コード例 #1
0
ファイル: simple_tensor_rnn.py プロジェクト: PFCM/rnns
    def __call__(self, inputs, states, scope=None):
        with tf.variable_scope(
                scope or type(self).__name__,
                initializer=tf.random_normal_initializer(stddev=0.01)):
            # get the tensor
            if self._separate_pad:
                t_shape = [self._num_outputs,
                           self._num_outputs,
                           self._num_inputs]
                vec_a = inputs
                vec_b = states
            else:
                t_shape = [self._num_outputs+1,
                           self._num_outputs,
                           self._num_inputs+1]
                vec_a = tf.concat(
                    axis=1, values=[inputs, tf.ones([inputs.get_shape()[0].value, 1])])
                vec_b = tf.concat(
                    axis=1, values=[inputs, tf.ones([inputs.get_shape()[0].value, 1])])
            tensor = get_tt_3_tensor(t_shape, self._ranks, name='W')
            result = bilinear_product_tt_3(vec_a, tensor, vec_b)
            if self._separate_pad:
                # TODO possible weightnorm
                D = tf.get_variable('D', [self._num_inputs, self._num_outputs],
                                    initializer=tf.uniform_unit_scaling_initializer(1.2))
                E = tf.get_variable('E', [self._num_outputs, self._num_outputs],
                                    initializer=tf.uniform_unit_scaling_initializer(1.2))
                b = tf.get_variable('b', [self._num_outputs],
                                    initializer=tf.constant_initializer(0.0))
                z = tf.nn.bias_add(tf.matmul(inputs, D) + tf.matmul(states, E), b)
                result = result + z

            result = self._nonlin(result)
            return result, result
コード例 #2
0
ファイル: model.py プロジェクト: alexeyche/alexeyche-junk
    def __init__(
        self,
        num_units,
        activation = simple_act,
        input_weights_init = tf.uniform_unit_scaling_initializer(factor=1.0),
        recc_weights_init = tf.uniform_unit_scaling_initializer(factor=0.1),
        sigma = 1.0,
        update_gate = True,
        dt = 1.0
    ):
        self._num_units = num_units
        self._activation = activation
        self._dt = dt
        self._sigma = sigma if sigma else 1.0
        self._update_gate = update_gate

        self.W = None
        self.U = None
        self.bias = None
        self.W_u = None
        self.U_u = None
        self.bias_u = None
        self.W_s = None
        self.U_s = None
        self.bias_s = None
        self.sigma = None

        self.input_weights_init = input_weights_init
        self.recc_weights_init = recc_weights_init
        
        self._sensitivity = False
        
        self.states_info = []
        self.update_info = []
コード例 #3
0
ファイル: tf.py プロジェクト: alexeyche/alexeyche-junk
 def _init_parameters(self):
     if self.W is None:
         self.W = vs.get_variable("W", [self._filters_num + self._num_units, self._num_units], initializer=tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
     if self.F is None:
         self.F = vs.get_variable("F", [L, filters_num], initializer=tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
     if self.R is None:
         self.R = vs.get_variable("R", [L, 1], initializer=tf.uniform_unit_scaling_initializer(factor=weight_init_factor*0.5))
コード例 #4
0
ファイル: init_ops_test.py プロジェクト: DapengLan/tensorflow
 def testInitializerIdentical(self):
   for use_gpu in [False, True]:
     init1 = tf.uniform_unit_scaling_initializer(seed=1)
     init2 = tf.uniform_unit_scaling_initializer(seed=1)
     self.assertTrue(identicaltest(self, init1, init2, use_gpu))
     init3 = tf.uniform_unit_scaling_initializer(1.5, seed=1)
     init4 = tf.uniform_unit_scaling_initializer(1.5, seed=1)
     self.assertTrue(identicaltest(self, init3, init4, use_gpu))
コード例 #5
0
ファイル: init_ops_test.py プロジェクト: DapengLan/tensorflow
 def testInitializerDifferent(self):
   for use_gpu in [False, True]:
     init1 = tf.uniform_unit_scaling_initializer(seed=1)
     init2 = tf.uniform_unit_scaling_initializer(seed=2)
     init3 = tf.uniform_unit_scaling_initializer(1.5, seed=1)
     self.assertFalse(identicaltest(self, init1, init2, use_gpu))
     self.assertFalse(identicaltest(self, init1, init3, use_gpu))
     self.assertFalse(identicaltest(self, init2, init3, use_gpu))
コード例 #6
0
ファイル: init_ops_test.py プロジェクト: Nishant23/tensorflow
 def testInitializerIdentical(self):
   for dtype in [tf.float32, tf.float64]:
     init1 = tf.uniform_unit_scaling_initializer(seed=1, dtype=dtype)
     init2 = tf.uniform_unit_scaling_initializer(seed=1, dtype=dtype)
     self.assertTrue(identicaltest(self, init1, init2))
     init3 = tf.uniform_unit_scaling_initializer(1.5, seed=1, dtype=dtype)
     init4 = tf.uniform_unit_scaling_initializer(1.5, seed=1, dtype=dtype)
     self.assertTrue(identicaltest(self, init3, init4))
コード例 #7
0
ファイル: init_ops_test.py プロジェクト: Nishant23/tensorflow
 def testInitializerDifferent(self):
   for dtype in [tf.float32, tf.float64]:
     init1 = tf.uniform_unit_scaling_initializer(seed=1, dtype=dtype)
     init2 = tf.uniform_unit_scaling_initializer(seed=2, dtype=dtype)
     init3 = tf.uniform_unit_scaling_initializer(1.5, seed=1, dtype=dtype)
     self.assertFalse(identicaltest(self, init1, init2))
     self.assertFalse(identicaltest(self, init1, init3))
     self.assertFalse(identicaltest(self, init2, init3))
コード例 #8
0
ファイル: model_utils.py プロジェクト: IgorWang/RNNLM
def sharded_variable(name, shape, num_shards, dtype=tf.float32, transposed=False):
    '''分片操作'''
    shard_size = int((shape[0] + num_shards - 1) / num_shards)
    if transposed:
        initializer = tf.uniform_unit_scaling_initializer(
            dtype=dtype, )
    else:
        initializer = tf.uniform_unit_scaling_initializer(dtype=dtype, )
    return [tf.get_variable(name + '_%d' % i, [shard_size,
                                               shape[1]],
                            initializer=initializer, dtype=dtype)
            for i in range(num_shards)]
コード例 #9
0
ファイル: FCN.py プロジェクト: 24hours/tf_fcn
def make_variable(name, shape, initializer, weight_decay=None, lr_mult=1, decay_mult=1):
    if lr_mult == 0:
        var = tf.get_variable(name, shape, initializer=initializer, trainable=False)
    elif weight_decay is None:
        var = tf.get_variable(  name, shape,
                                initializer=tf.uniform_unit_scaling_initializer())
    else:
        var = tf.get_variable(  name, shape,
			initializer=tf.uniform_unit_scaling_initializer(),
                                regularizer=tf.contrib.layers.l2_regularizer(weight_decay*decay_mult))

    if lr_mult > 0:
        tf.add_to_collection(str(lr_mult), var);

    return var
コード例 #10
0
ファイル: lca_tf.py プロジェクト: alexeyche/alexeyche-junk
    def _init_parameters(self):
    	return tf.get_variable("F", [self._filter_size, self._input_size, self._layer_size], 
	        initializer=tf.uniform_unit_scaling_initializer(factor=c.weight_init_factor)
        )

	def __call__(self, input, state, scope=None):
		####
		
		if self._params is None:
			self._params = self._init_parameters()

		x = input
		u, a = state
		F = self._params

		####
		b = tf.nn.conv1d(x, F, 1)
		Fc = tf.matmul(tf.transpose(F, (0, 2, 1), F))
		fb = tf.conv1d(a, Fc, 1)
		
		print "b", b.get_shape()
		print "Fc", Fc.get_shape()
		print "fb", fb.get_shape()

		du = - u + b - fb
		new_u = u + c.epsilon * du / c.tau

        new_a = tf.nn.relu(new_u - c.lam)
        
        ####

        return (new_u, new_a), (new_u, new_a)
コード例 #11
0
  def testTransformerAutoencoder(self):
    hparams = imagetransformer_latent_tiny()
    hparams.mode = tf.estimator.ModeKeys.TRAIN
    block_dim = int(hparams.hidden_size // hparams.num_blocks)
    block_v_size = 2**(hparams.bottleneck_bits /
                       (hparams.num_residuals * hparams.num_blocks))
    block_v_size = int(block_v_size)
    means = tf.get_variable(
        name="means",
        shape=[hparams.num_residuals,
               hparams.num_blocks,
               block_v_size,
               block_dim],
        initializer=tf.uniform_unit_scaling_initializer())
    hparams.bottleneck = functools.partial(
        discretization.discrete_bottleneck,
        hidden_size=hparams.hidden_size,
        z_size=hparams.bottleneck_bits,
        filter_size=hparams.filter_size,
        startup_steps=hparams.startup_steps,
        bottleneck_kind=hparams.bottleneck_kind,
        num_blocks=hparams.num_blocks,
        num_residuals=hparams.num_residuals,
        reshape_method=hparams.reshape_method,
        beta=hparams.vq_beta,
        decay=hparams.vq_decay,
        soft_em=hparams.soft_em,
        num_samples=hparams.num_samples,
        epsilon=hparams.vq_epsilon,
        ema=hparams.ema,
        means=means)

    inputs = None
    batch_size = hparams.batch_size
    targets = tf.random_uniform([batch_size,
                                 hparams.img_len,
                                 hparams.img_len,
                                 hparams.hidden_size],
                                minval=-1., maxval=1.)
    target_space_id = None

    tf.train.create_global_step()
    decoder_output, losses, cache = latent_layers.transformer_autoencoder(
        inputs, targets, target_space_id, hparams)

    self.assertEqual(set(six.iterkeys(losses)),
                     {"extra", "extra_loss", "latent_pred"})

    self.evaluate(tf.global_variables_initializer())
    decoder_output_, extra_loss_, latent_pred_ = self.evaluate(
        [decoder_output, losses["extra_loss"], losses["latent_pred"]])
    self.assertEqual(decoder_output_.shape, (batch_size,
                                             hparams.img_len,
                                             hparams.img_len,
                                             hparams.hidden_size))
    self.assertEqual(extra_loss_.shape, (batch_size,))
    self.assertEqual(latent_pred_.shape, (batch_size,))
    self.assertAllGreaterEqual(extra_loss_, 0.)
    self.assertAllGreaterEqual(latent_pred_, 0.)
    self.assertEqual(cache, None)
コード例 #12
0
ファイル: additive_tensor_rnn.py プロジェクト: PFCM/rnns
    def __call__(self, inputs, states, scope=None):
        with tf.variable_scope(scope or type(self).__name__) as outer_scope:
            # do it
            # sub scope for the tensor init
            # should inherit reuse from outer scope
            with tf.variable_scope('tensor',
                                   initializer=init.orthonormal_init(0.5)):
                tensor = get_cp_tensor([self.input_size,
                                        self.output_size,
                                        self.state_size],
                                       self.rank,
                                       'W',
                                       weightnorm=False,
                                       trainable=True)
            combination = bilinear_product_cp(inputs, tensor, states)
            # and project the input
            input_weights = tf.get_variable('U', shape=[self.input_size,
                                                        self._input_projection],
                                            initializer=tf.uniform_unit_scaling_initializer(1.4))
            input_proj = tf.matmul(inputs, input_weights)
            # apply a bias pre-nonlinearity
            bias = tf.get_variable('b', shape=[self.output_size],
                                   initializer=tf.constant_initializer(0.0))
            if self.layernorm == 'pre':
                activations = layer_normalise(combination + input_proj + bias)
            else:
                activations = combination + input_proj + bias

            result = self._nonlinearity(activations)

            if self.layernorm == 'post':
                result = layer_normalise(result)

            result = result + states
        return result, result
コード例 #13
0
ファイル: fc.py プロジェクト: Paseam/tensorpack
def FullyConnected(x, out_dim,
                   W_init=None, b_init=None,
                   nl=tf.nn.relu, use_bias=True):
    """
    Fully-Connected layer.

    :param input: a tensor to be flattened except the first dimension.
    :param out_dim: output dimension
    :param W_init: initializer for W. default to `xavier_initializer_conv2d`.
    :param b_init: initializer for b. default to zero initializer.
    :param nl: nonlinearity. default to `relu`.
    :param use_bias: whether to use bias. a boolean default to True
    :returns: a 2D tensor
    """
    x = batch_flatten(x)
    in_dim = x.get_shape().as_list()[1]

    if W_init is None:
        #W_init = tf.truncated_normal_initializer(stddev=1 / math.sqrt(float(in_dim)))
        W_init = tf.uniform_unit_scaling_initializer(factor=1.43)
    if b_init is None:
        b_init = tf.constant_initializer()

    W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
    if use_bias:
        b = tf.get_variable('b', [out_dim], initializer=b_init)
    prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
    return nl(prod, name='output')
コード例 #14
0
ファイル: rl_train.py プロジェクト: windweller/nlc
def setup_loss_critic(critic):
    # we are starting with critic.outputs symbol (after logistic layer)
    with tf.variable_scope("rl", initializer=tf.uniform_unit_scaling_initializer(1.0)):
        # loss setup
        # None to timestep
        critic.target_qt = tf.placeholder(tf.float32, shape=[None, None, critic.vocab_size],
                                            name="q_action_score")
        # p_actions is the target_token, and it's already [T, batch_size]
        # q_t needs to be expanded...

        # critic.outputs [T, batch_size, vocab_size]
        # let's populate (expand) target tokens to fill up qt (just like what we did with one-hot labels)

        critic.q_loss = tf.reduce_mean(tf.square(critic.outputs - critic.target_qt))  # Note: not adding lambda*C yet (variance)

        opt = nlc_model.get_optimizer(FLAGS.optimizer)(critic.learning_rate)

        # update
        params = tf.trainable_variables()
        gradients = tf.gradients(critic.q_loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm)
        #      self.gradient_norm = tf.global_norm(clipped_gradients)
        critic.gradient_norm = tf.global_norm(gradients)
        critic.param_norm = tf.global_norm(params)
        critic.updates = opt.apply_gradients(
            zip(clipped_gradients, params), global_step=critic.global_step)
コード例 #15
0
ファイル: embedders.py プロジェクト: 812864539/models
 def _fully_connected(self, x, out_dim):
   x = tf.reshape(x, [self._params.batch_size, -1])
   w = tf.get_variable(
       'DW', [x.get_shape()[1], out_dim],
       initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
   b = tf.get_variable(
       'biases', [out_dim], initializer=tf.constant_initializer())
   return tf.nn.xw_plus_b(x, w, b)
コード例 #16
0
 def __init__(self,FLAGS):
     # Q: we can use an LSTM in the decoder too, but it may be a better idea not to increase the number of parameters too much
     self.state_size = FLAGS.state_size
     self.maxSentenceLength = FLAGS.maxSentenceLength
     with vs.variable_scope("decoder", initializer = tf.contrib.layers.xavier_initializer()):
         self.W = tf.get_variable("W", dtype = tf.float64, shape = (self.state_size,1))
         self.b = tf.get_variable("b", dtype = tf.float64, shape = (1,),
         initializer=tf.uniform_unit_scaling_initializer(1.0))
コード例 #17
0
 def _fully_connected(self, x, out_dim, name=''):
     with tf.variable_scope(name):
         x = tf.reshape(x, [self._batch_size, -1]);
         w = tf.get_variable(
             name+'DW', [x.get_shape()[1], out_dim],
             initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
         b = tf.get_variable(name+'biases', [out_dim],
                         initializer=tf.constant_initializer())
         return tf.nn.xw_plus_b(x, w, b)
コード例 #18
0
ファイル: resnet_model.py プロジェクト: Npccc/Study
 def _fully_connected(self, x, out_dim):
   # 输入转换成2D tensor,尺寸为[N,-1]
   x = tf.reshape(x, [self.hps.batch_size, -1])
   # 参数w,平均随机初始化,[-sqrt(3/dim), sqrt(3/dim)]*factor
   w = tf.get_variable('DW', [x.get_shape()[1], out_dim],
                       initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
   # 参数b,0值初始化
   b = tf.get_variable('biases', [out_dim], initializer=tf.constant_initializer())
   # 计算x*w+b
   return tf.nn.xw_plus_b(x, w, b)
コード例 #19
0
ファイル: simple_tensor_rnn.py プロジェクト: PFCM/rnns
    def __call__(self, inputs, states, scope=None):
        """does the stuff"""
        with tf.variable_scope(scope or type(self).__name__,
                               initializer=init.spectral_normalised_init(0.5)):
            # first we need to get the tensor
            if not self._separate_pad:
                shape = [self._num_units+1,
                         self._num_units,
                         self._num_inputs+1]

                vec_b = tf.concat(
                    axis=1, values=[inputs, tf.ones([inputs.get_shape()[0].value, 1])])
                vec_a = tf.concat(
                    axis=1, values=[states, tf.ones([inputs.get_shape()[0].value, 1])])
            else:
                shape = [self._num_units,
                         self._num_units,
                         self._num_inputs]
                vec_a, vec_b = states, inputs
            tensor = get_cp_tensor(shape,
                                   self._rank,
                                   'W',
                                   weightnorm=self._weightnorm)
            result = bilinear_product_cp(vec_a, tensor, vec_b)

            if self._separate_pad:
                # TODO: use the new handy things
                if self._weightnorm:
                    in_weights = get_weightnormed_matrix(
                        [self._num_inputs, self._num_units],
                        name='input_weights')
                    rec_weights = get_weightnormed_matrix(
                        [self._num_units, self._num_units],
                        name='recurrent_weights',
                        V_init=init.identity_initializer())
                else:
                    in_weights = tf.get_variable(
                        'input_weights',
                        [self._num_inputs, self._num_units],
                        tf.float32,
                        initializer=tf.uniform_unit_scaling_initializer())
                    rec_weights = tf.get_variable(
                        'recurrent_weights',
                        [self._num_units, self._num_units],
                        tf.float32,
                        initializer=init.identity_initializer())
                bias = tf.get_variable('bias',
                                       [self._num_units],
                                       initializer=tf.constant_initializer(0.0))
                result += tf.nn.bias_add(
                    tf.matmul(vec_a, rec_weights) + tf.matmul(vec_b, in_weights),
                    bias)

            result = self._nonlinearity(result)
            return result, result
コード例 #20
0
ファイル: model.py プロジェクト: hujiewang/research
 def getModel(input):
     # 2-layer NN
     with tf.variable_scope("NN", initializer=tf.uniform_unit_scaling_initializer(factor=1.15)):
         W_1 = tf.get_variable("W_1", [self.config.num_hidden_1, input.get_shape()[0]])
         self._test=W_1
         b_1 = tf.get_variable("b_1", [self.config.num_hidden_1,1])
         W_2 = tf.get_variable("W_2", [self.config.num_hidden_2, self.config.num_hidden_1])
         b_2 = tf.get_variable("b_2", [self.config.num_hidden_2,1])
         y_1 = tf.sigmoid(tf.matmul(W_1, input)+b_1)
         y_2 = tf.sigmoid(tf.matmul(W_2, y_1)+b_2)
     return y_2
コード例 #21
0
ファイル: vqvae.py プロジェクト: ccchang0111/sonnet
  def __init__(self, embedding_dim, num_embeddings, commitment_cost,
               name='vq_layer'):
    super(VectorQuantizer, self).__init__(name=name)
    self._embedding_dim = embedding_dim
    self._num_embeddings = num_embeddings
    self._commitment_cost = commitment_cost

    with self._enter_variable_scope():
      initializer = tf.uniform_unit_scaling_initializer()
      self._w = tf.get_variable('embedding', [embedding_dim, num_embeddings],
                                initializer=initializer, trainable=True)
コード例 #22
0
def fc(inputs, w_shape, b_shape):
    w = tf.get_variable(
        "weights", 
        w_shape, 
        initializer=tf.truncated_normal_initializer(dtype=tf.float32, stddev=0.36), 
        regularizer=tf.nn.l2_loss)
    b = tf.get_variable(
        "bias", 
        b_shape, 
        initializer=tf.uniform_unit_scaling_initializer(factor=0.1, seed=10, dtype=tf.float32))
    return tf.matmul(inputs, w)
コード例 #23
0
ファイル: main.py プロジェクト: leconteur/CaptionImaging
def init_models(eval_config, rnn_config, sess):
    global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
    print('Creating rnn model')
    if flags.FLAGS.restore:
        initializer = None
    else:
        initializer = tf.uniform_unit_scaling_initializer()
    with tf.variable_scope("model", reuse=None, initializer=initializer):
        train_image_tensor = tf.placeholder(np.float32,
                                            (rnn_config.batch_size, rnn_config.image_size,
                                             rnn_config.image_size, 3), 'input_image')
        m = MultiModal(is_training=True,
                       config=rnn_config,
                       image_tensor=train_image_tensor,
                       global_step_tensor=global_step_tensor)
        m.load_alexnet('models/alexnet_weights.npy', sess)
        variables_to_save = tf.trainable_variables() + [global_step_tensor]
        #print(variables_to_save)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
        mvalid = MultiModal(is_training=False,
                            config=rnn_config,
                            image_tensor=train_image_tensor,
                            global_step_tensor=global_step_tensor)
        test_image_tensor = tf.placeholder(np.float32,
                                           (eval_config.batch_size, eval_config.image_size,
                                            eval_config.image_size, 3), 'test_input_image')
        mtest = MultiModal(is_training=False,
                           config=eval_config,
                           image_tensor=test_image_tensor,
                           global_step_tensor=global_step_tensor)

        initial_value = np.zeros((eval_config.batch_size, eval_config.image_size, eval_config.image_size, 3)).astype(np.float32)
        initial_value[0,:,:,:] = skimage.img_as_float(skimage.io.imread('data/test_image.jpg'))
        image_gen = tf.Variable(initial_value, trainable=True)
        mgen = MultiModal(is_training=False,
                          config=eval_config,
                          image_tensor=image_gen,
                          global_step_tensor=global_step_tensor)

        gradients = tf.gradients(mgen.cost, [image_gen])
        print(gradients)
        optimizer = tf.train.AdamOptimizer(0.1)
        image_train = optimizer.apply_gradients(zip(gradients, [image_gen]))

    merged = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter("logs/")
    saver = tf.train.Saver()
    tf.initialize_all_variables().run()
    if flags.FLAGS.restore:
        checkpoint = tf.train.latest_checkpoint(os.path.abspath('ckpts/'))
        if checkpoint:
            print('Restoring from checkpoint: {}'.format(checkpoint))
            saver.restore(sess, checkpoint)
    return global_step_tensor, m, merged, mtest, mvalid, mgen, image_gen, image_train, saver, writer
コード例 #24
0
ファイル: masked.py プロジェクト: QianQQ/Voice-Conversion
def conv1d_log(x,
           num_filters,
           filter_length,
           name,
           dilation=1,
           causal=True,
           kernel_initializer=tf.uniform_unit_scaling_initializer(1.0),
           biases_initializer=tf.constant_initializer(0.0)):
  """Fast 1D convolution that supports causal padding and dilation.
  Args:
    x: The [mb, time, channels] float tensor that we convolve.
    num_filters: The number of filter maps in the convolution.
    filter_length: The integer length of the filter.
    name: The name of the scope for the variables.
    dilation: The amount of dilation.
    causal: Whether or not this is a causal convolution.
    kernel_initializer: The kernel initialization function.
    biases_initializer: The biases initialization function.
  Returns:
    y: The output of the 1D convolution.
  """
  batch_size, length, num_input_channels = x.get_shape().as_list()
  assert length % dilation == 0

  kernel_shape = [1, filter_length, num_input_channels, num_filters]
  strides = [1, 1, 1, 1]
  biases_shape = [num_filters]
  padding = 'VALID' if causal else 'SAME'

  with tf.variable_scope(name):
    weights = tf.get_variable(
        'W', shape=kernel_shape, initializer=kernel_initializer)
    biases = tf.get_variable(
        'biases', shape=biases_shape, initializer=biases_initializer)

  x_ttb = time_to_batch(x, dilation)
  if filter_length > 1 and causal:
    x_ttb = tf.pad(x_ttb, [[0, 0], [filter_length - 1, 0], [0, 0]])

  W_mean = tf.reduce_mean(weights)
  biases_mean = tf.reduce_mean(biases)

  x_ttb_shape = x_ttb.get_shape().as_list()
  x_4d = tf.reshape(x_ttb, [x_ttb_shape[0], 1,
                            x_ttb_shape[1], num_input_channels])

  y = tf.nn.conv2d(x_4d, weights, strides, padding=padding)
  y = tf.nn.bias_add(y, biases)

  y_shape = y.get_shape().as_list()
  y = tf.reshape(y, [y_shape[0], y_shape[2], num_filters])
  y = batch_to_time(y, dilation)
  y.set_shape([batch_size, length, num_filters])
  return y, W_mean, biases_mean
コード例 #25
0
 def __init__(self, FLAGS):
     self.numClasses = FLAGS.numClasses
     self.maxSentenceLength = FLAGS.maxSentenceLength   
     with vs.variable_scope("classifier", initializer = tf.contrib.layers.xavier_initializer()):
         # self.U = tf.get_variable("U", dtype = tf.float64, 
         # shape = (self.maxSentenceLength,self.numClasses))
         self.U = tf.get_variable("U", dtype = tf.float64, 
         shape = (self.maxSentenceLength,1))
         # self.b = tf.get_variable("b", dtype = tf.float64, shape = (self.numClasses,), 
         # initializer=tf.uniform_unit_scaling_initializer(1.0))
         self.b = tf.get_variable("b", dtype = tf.float64, shape = (1,), 
         initializer=tf.uniform_unit_scaling_initializer(1.0))
コード例 #26
0
 def _fully_connected(self, x, out_dim):
   """FullyConnected layer for final output."""
   num_non_batch_dimensions = len(x.shape)
   prod_non_batch_dimensions = 1
   for ii in range(num_non_batch_dimensions - 1):
     prod_non_batch_dimensions *= int(x.shape[ii + 1])
   x = tf.reshape(x, [tf.shape(x)[0], -1])
   w = tf.get_variable(
       'DW', [prod_non_batch_dimensions, out_dim],
       initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
   b = tf.get_variable('biases', [out_dim],
                       initializer=tf.constant_initializer())
   return tf.nn.xw_plus_b(x, w, b)
コード例 #27
0
    def _fully_connected(self, x, out_dim):
        """FullyConnected layer for final output."""
        #x = tf.reshape(x, [self.hps.batch_size, -1])
        w = tf.get_variable(
            'DW', [x.get_shape()[1], out_dim],
            initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
        b = tf.get_variable('biases', [out_dim],
                            initializer=tf.constant_initializer())
        self.fc_x = x
        self.fc_w = w
        self.fc_b = b

        return tf.nn.xw_plus_b(x, w, b)
コード例 #28
0
 def _fc_layer(self, input_tensor, n_out, n_in=None, activation=tf.identity):
     """
     The fully connected layer
     :param input_tensor: 2-D tensor 
     :param n_in: int, the number of input units
     :param n_out: int, the number of output units
     :param activation: activation function, default you use identity activation
     """
     if n_in is None:
         n_in = input_tensor.get_shape().as_list()[-1]
     weights = self._get_variable("fc_weight", [n_in, n_out], initializer=tf.uniform_unit_scaling_initializer(factor=1.0),
                                 is_fc_layer=True)
     biases = self._get_variable("fc_bias", [n_out,], initializer=tf.zeros_initializer, is_fc_layer=True)
     wx_b = tf.matmul(input_tensor, weights) + biases
     return activation(wx_b)
コード例 #29
0
ファイル: nlc_model.py プロジェクト: windweller/nlc
  def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate,
               learning_rate_decay_factor, dropout, FLAGS, forward_only=False, optimizer="adam"):
    self.size = size
    self.vocab_size = vocab_size
    self.batch_size = batch_size
    self.num_layers = num_layers
    self.keep_prob_config = 1.0 - dropout
    self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
    self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
    self.global_step = tf.Variable(0, trainable=False)

    self.keep_prob = tf.placeholder(tf.float32)
    self.source_tokens = tf.placeholder(tf.int32, shape=[None, None])
    self.target_tokens = tf.placeholder(tf.int32, shape=[None, None])
    self.source_mask = tf.placeholder(tf.int32, shape=[None, None])
    self.target_mask = tf.placeholder(tf.int32, shape=[None, None])
    self.beam_size = tf.placeholder(tf.int32)
    self.target_length = tf.reduce_sum(self.target_mask, reduction_indices=0)

    self.FLAGS = FLAGS

    self.decoder_state_input, self.decoder_state_output = [], []
    for i in xrange(num_layers):
      self.decoder_state_input.append(tf.placeholder(tf.float32, shape=[None, size]))

    with tf.variable_scope("NLC", initializer=tf.uniform_unit_scaling_initializer(1.0)):
      self.setup_embeddings()
      self.setup_encoder()
      self.setup_decoder()
      self.setup_loss()

      self.setup_beam()

    params = tf.trainable_variables()
    if not forward_only:
      opt = get_optimizer(optimizer)(self.learning_rate)

      gradients = tf.gradients(self.losses, params)
      clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
#      self.gradient_norm = tf.global_norm(clipped_gradients)
      self.gradient_norm = tf.global_norm(gradients)
      self.param_norm = tf.global_norm(params)
      self.updates = opt.apply_gradients(
        zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.keep)  # write_version=tf.train.SaverDef.V1
コード例 #30
0
def linear(args, output_size, bias, bias_start=0.0, scope=None):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_start: starting value to initialize the bias; 0 by default.
    scope: VariableScope for the created subgraph; defaults to "Linear".

  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  #assert args
  if not isinstance(args, (list, tuple)):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape().as_list() for a in args]
  for shape in shapes:
    if len(shape) != 2:
      raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes))
    if not shape[1]:
      raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
    else:
      total_arg_size += shape[1]

  # Now the computation.
  with tf.variable_scope(scope or "Linear"):
    matrix = tf.get_variable("Matrix", [total_arg_size, output_size], 
                      initializer = tf.uniform_unit_scaling_initializer())
    if len(args) == 1:
      res = tf.matmul(args[0], matrix)
    else:
      res = tf.matmul(tf.concat(1, args), matrix)
    if bias is None:
      return res
    bias_term = tf.get_variable("Bias", [output_size],
                                initializer=tf.constant_initializer(bias_start))
  return res + bias_term
コード例 #31
0
    def __init__(self, encoder, *args):
        """
        Initializes your System

        :param encoder: an encoder that you constructed in train.py
        :param decoder: a decoder that you constructed in train.py
        :param args: pass in more arguments as needed
        """

        self.encoder = encoder

        # ==== set up placeholder tokens ========
        # TMP TO REMOVE START
        self.config = args[0]  # FLAG
        self.pretrained_embeddings = args[1]  # embeddings
        self.num_per_epoch = args[2]

        # self.saver = args[2]

        # max_question_length = 66
        # max_context_length = 35
        # embedding_size = 50
        # label_size = 2

        # TMP TO REMOVE END
        self.question_placeholder = tf.placeholder(
            tf.int64,
            (None, self.config.max_question_length, self.config.n_features))
        print(self.question_placeholder)
        self.question_length_placeholder = tf.placeholder(tf.int64, (None, ))
        self.context_placeholder = tf.placeholder(
            tf.int64,
            (None, self.config.max_context_length, self.config.n_features))
        self.context_length_placeholder = tf.placeholder(tf.int64, (None, ))

        self.start_labels_placeholder = tf.placeholder(tf.int64, (None, ))
        self.end_labels_placeholder = tf.placeholder(tf.int64, (None, ))
        self.mask_placeholder = tf.placeholder(
            tf.float32, (None, self.config.max_context_length))

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.setup_embeddings()
        # self.preds = self.setup_system()
        u_pred_s, u_pred_e = self.setup_system()
        self.preds = (self.exp_mask(u_pred_s), self.exp_mask(u_pred_e)
                      )  # mask the start end end predictions
        self.loss = self.setup_loss(self.preds)

        # ==== set up training/updating procedure ====
        optfn = get_optimizer(self.config.optimizer)

        self.global_step = tf.contrib.framework.get_or_create_global_step()
        num_batches_per_epoch = (self.num_per_epoch / self.config.batch_size)
        self.decay_steps = int(num_batches_per_epoch *
                               self.config.num_epochs_per_decay)

        # Decay the learning rate exponentially based on the number of steps.
        self.lr = tf.train.exponential_decay(
            self.config.learning_rate,
            self.global_step,
            self.decay_steps,
            self.config.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', self.lr)

        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
        self.summary_op = tf.summary.merge(summaries)

        self.train_op = optfn(self.lr).minimize(self.loss,
                                                global_step=self.global_step)
        self.saver = tf.train.Saver()
コード例 #32
0
ファイル: qacnn.py プロジェクト: zr8091/InsuranceQA_zh
 def add_embeddings(self):
     with tf.variable_scope('embedding'):
         embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
         q_embed = tf.nn.embedding_lookup(embeddings, self.q)
         aplus_embed = tf.nn.embedding_lookup(embeddings, self.aplus)
         aminus_embed = tf.nn.embedding_lookup(embeddings, self.aminus)
         return q_embed, aplus_embed, aminus_embed
コード例 #33
0
 def add_embeddings(self):
     with tf.variable_scope('embedding'):
         if self.config.embeddings is not None:
             embeddings = tf.Variable(self.config.embeddings, name="embeddings", trainable=False)
         else:
             embeddings = tf.get_variable('embeddings', shape=[self.config.vocab_size, self.config.embedding_size], initializer=tf.uniform_unit_scaling_initializer())
         q_embed = tf.nn.embedding_lookup(embeddings, self.q)
         a_embed = tf.nn.embedding_lookup(embeddings, self.a)
         return q_embed, a_embed
コード例 #34
0
ファイル: qa_model.py プロジェクト: jeffrey1hu/qa_system
    def __init__(self, encoder, decoder, embed_path):
        """
        Initializes your System

        :param encoder: an encoder that you constructed in train.py
        :param decoder: a decoder that you constructed in train.py
        :param args: pass in more arguments as needed
        """
        self.encoder = encoder
        self.decoder = decoder
        self.embed_path = embed_path

        # ==== set up placeholder tokens ========
        self.context = tf.placeholder(tf.int32, shape=(None, context_max_len))
        self.context_m = tf.placeholder(tf.bool, shape=(None, context_max_len))

        self.question = tf.placeholder(tf.int32,
                                       shape=(None, question_max_len))
        self.question_m = tf.placeholder(tf.bool,
                                         shape=(None, question_max_len))

        self.answer_s = tf.placeholder(tf.int32, shape=(None, ))
        self.answer_e = tf.placeholder(tf.int32, shape=(None, ))

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.setup_embeddings()
            self.setup_system()
            self.setup_loss()

            # ==== set up training/updating procedure ====

            self.global_step = tf.Variable(cfg.start_steps, trainable=False)
            self.starter_learning_rate = tf.placeholder(tf.float32,
                                                        name='start_lr')
            learning_rate = tf.train.exponential_decay(
                self.starter_learning_rate,
                self.global_step,
                1000,
                0.9,
                staircase=True)
            tf.summary.scalar('learning_rate', learning_rate)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

            # grad_var = self.optimizer.compute_gradients(self.final_loss)
            # grad = [i[0] for i in grad_var]
            # var = [i[1] for i in grad_var]
            # self.grad_norm = tf.global_norm(grad)
            # tf.summary.scalar('grad_norm', self.grad_norm)
            # grad, use_norm = tf.clip_by_global_norm(grad, max_grad_norm)
            #
            # self.train_op = self.optimizer.apply_gradients(zip(grad, var), global_step=self.global_step)

            gradients = self.optimizer.compute_gradients(self.final_loss)
            capped_gvs = [(tf.clip_by_value(grad, -clip_by_val,
                                            clip_by_val), var)
                          for grad, var in gradients]
            grad = [x[0] for x in gradients]
            self.grad_norm = tf.global_norm(grad)
            tf.summary.scalar('grad_norm', self.grad_norm)
            self.train_op = self.optimizer.apply_gradients(
                capped_gvs, global_step=self.global_step)

            self.saver = tf.train.Saver()
            self.merged = tf.summary.merge_all()
コード例 #35
0
ファイル: gcnn.py プロジェクト: zhaoqiuye/DAAT-CWS
    def encoder(self,
                is_training=False,
                hidden_layers=5,
                kernel_size=3,
                channels=[200] * 5,
                dropout_emb=0.2,
                dropout_hidden=0.2,
                use_wn=True,
                use_bn=False):
        # Define the encoder
        # embeddings = tf.get_variable('embeddings', [self.vocab_size, self.emb_size])
        with tf.variable_scope(
                self.scope,
                reuse=self.reuse,
                initializer=tf.uniform_unit_scaling_initializer()):
            masks = tf.cast(tf.sequence_mask(self.seq_lengths, maxlen=64),
                            FLOAT_TYPE)
            # Dropout on embedding output.
            if dropout_emb:
                self.inputs = tf.cond(
                    self.is_train,
                    lambda: tf.nn.dropout(self.inputs, 1 - dropout_emb),
                    lambda: self.inputs)
            hidden_output = self.inputs
            pre_channels = self.inputs.get_shape()[-1].value
            for i in xrange(hidden_layers):
                k = kernel_size
                cur_channels = channels[i]
                filter_w = tf.get_variable(
                    'filter_w_%d' % i,
                    shape=[k, pre_channels, cur_channels],
                    dtype=FLOAT_TYPE)
                filter_v = tf.get_variable(
                    'filter_v_%d' % i,
                    shape=[k, pre_channels, cur_channels],
                    dtype=FLOAT_TYPE)
                bias_b = tf.get_variable(
                    'bias_b_%d' % i,
                    shape=[cur_channels],
                    initializer=tf.zeros_initializer(dtype=FLOAT_TYPE))
                bias_c = tf.get_variable(
                    'bias_c_%d' % i,
                    shape=[cur_channels],
                    initializer=tf.zeros_initializer(dtype=FLOAT_TYPE))

                # Weight normalization.
                if use_wn:
                    epsilon = 1e-12
                    g_w = tf.get_variable('g_w_%d' % i,
                                          shape=[k, 1, cur_channels],
                                          dtype=FLOAT_TYPE)
                    g_v = tf.get_variable('g_v_%d' % i,
                                          shape=[k, 1, cur_channels],
                                          dtype=FLOAT_TYPE)
                    # Perform wn
                    filter_w = g_w * filter_w / (tf.sqrt(
                        tf.reduce_sum(filter_w**2, 1, keep_dims=True)) +
                                                 epsilon)
                    filter_v = g_v * filter_v / (tf.sqrt(
                        tf.reduce_sum(filter_v**2, 1, keep_dims=True)) +
                                                 epsilon)

                w = tf.nn.conv1d(hidden_output, filter_w, 1, 'SAME') + bias_b
                v = tf.nn.conv1d(hidden_output, filter_v, 1, 'SAME') + bias_c

                if use_bn:
                    w = layers.batch_norm(inputs=v,
                                          decay=0.9,
                                          is_training=self.is_train,
                                          center=True,
                                          scale=True,
                                          scope='BatchNorm_w_%d' % i)
                    v = layers.batch_norm(inputs=w,
                                          decay=0.9,
                                          is_training=self.is_train,
                                          center=True,
                                          scale=True,
                                          scope='BatchNorm_v_%d' % i)

                hidden_output = w * tf.nn.sigmoid(v)

                # Mask paddings.
                hidden_output = hidden_output * tf.expand_dims(masks, -1)
                # Dropout on hidden output.
                if dropout_hidden:
                    hidden_output = tf.cond(
                        self.is_train, lambda: tf.nn.dropout(
                            hidden_output, 1 - dropout_hidden),
                        lambda: hidden_output)

                pre_channels = cur_channels

            hidden_output = hidden_output
            self.fc1 = hidden_output
コード例 #36
0
 def init_embedding(self):
     self.embedding = {}
     self.embedding['user_embedding'] = tf.get_variable("user_embedding",shape=(self.num_user,self.dim),dtype=tf.float32,initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
     self.embedding['item_embedding'] = tf.get_variable("item_embedding",shape=(self.num_item,self.dim),dtype=tf.float32,initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
     self.embedding['aspect_embedding'] = tf.get_variable("aspect_embedding",shape=(self.num_user,self.dim),dtype=tf.float32,initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
コード例 #37
0
def create_model(hps, vocab_size, classes_size):

    # 输入定义
    encoded_length = hps.encoded_length
    batch_size = hps.batch_size

    inputs = tf.placeholder(tf.int32, (batch_size, encoded_length))
    outputs = tf.placeholder(tf.int32, (batch_size, ))

    # for drop_out
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')

    # record training step, un-trainable,保存当前训练到了那一步
    global_step = tf.Variable(
        tf.zeros([], tf.int64), name='global_step', trainable=False
    )

    # embedding layer
    # initialize embedding layer with uniform-distribution from -1 to +1
    embedding_init = tf.random_uniform_initializer(-1.0, 1.)
    with tf.variable_scope('embedding', initializer=embedding_init):
        embedding = tf.get_variable(
            'embedding',
            [vocab_size, hps.embedding_size],   # size of embedding matrix
            tf.float32
        )
        #
        embedded_inputs = tf.nn.embedding_lookup(embedding, inputs)

    # LSTM layers
    scale = 1.0/math.sqrt(hps.embedding_size + hps.num_lstm_nodes[-1])/3.0
    lstm_init = tf.random_uniform_initializer(-scale, scale)

    def _generate_params_for_lstm_cell(x_size, h_size, bias_size):
        """

        :param x_size:
        :param h_size:
        :param bias_size:
        :return:
        """
        x_w = tf.get_variable('x_weights', x_size)
        h_w = tf.get_variable('h_weights', h_size)
        b = tf.get_variable('bias', bias_size, initializer=tf.constant_initializer(0.0))
        return x_w, h_w, b
    # one LSTM layer
    with tf.variable_scope('lstm', initializer=lstm_init):
        # all params in the lstm cell:
        with tf.variable_scope('inputs'):
            ix_w, ih_w, ib = _generate_params_for_lstm_cell(
                x_size=[hps.embedding_size, hps.num_lstm_nodes[0]],
                h_size=[hps.num_lstm_nodes[0], hps.num_lstm_nodes[0]],
                bias_size=[1, hps.num_lstm_nodes[0]]
            )
        with tf.variable_scope('outputs'):
            ox_w, oh_w, ob = _generate_params_for_lstm_cell(
                x_size=[hps.embedding_size, hps.num_lstm_nodes[0]],
                h_size=[hps.num_lstm_nodes[0], hps.num_lstm_nodes[0]],
                bias_size=[1, hps.num_lstm_nodes[0]]
            )
        with tf.variable_scope('forget'):
            fx_w, fh_w, fb = _generate_params_for_lstm_cell(
                x_size=[hps.embedding_size, hps.num_lstm_nodes[0]],
                h_size=[hps.num_lstm_nodes[0], hps.num_lstm_nodes[0]],
                bias_size=[1, hps.num_lstm_nodes[0]]
            )
        # tanh
        with tf.variable_scope('memory'):
            cx_w, ch_w, cb = _generate_params_for_lstm_cell(
                x_size=[hps.embedding_size, hps.num_lstm_nodes[0]],
                h_size=[hps.num_lstm_nodes[0], hps.num_lstm_nodes[0]],
                bias_size=[1, hps.num_lstm_nodes[0]]
            )
        state = tf.Variable(
            tf.zeros([batch_size, hps.num_lstm_nodes[0]]),
            trainable=False
        )
        h = tf.Variable(
            tf.zeros([batch_size, hps.num_lstm_nodes[0]]),
            trainable=False
        )

        # implement lstm. each word has its own lstm cell
        for i in range(encoded_length):
            embedd_input = embedded_inputs[:, i, :]   # ????
            embedd_input = tf.reshape(embedd_input, [batch_size, hps.num_embedding_size])
            forget_gate = tf.sigmoid(
                                    tf.matmul(embedd_input, fx_w) + tf.matmul(h, fh_w) + fb)
            input_gate = tf.sigmoid(
                                    tf.matmul(embedd_input, ix_w) + tf.matmul(h, ih_w) + ib)
            mid_state = tf.tanh(
                                    tf.matmul(embedd_input, cx_w) + tf.matmul(h, ch_w) + cb)
            output_gate = tf.sigmoid(
                                    tf.matmul(embedd_input, ox_w) + tf.matmul(h, oh_w) + ob)
            state_C = mid_state * input_gate + state_C * forget_gate
            h = output_gate * tf.tanh(state)
        last = h  # size: [100, 32]

    # fc layer
    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    with tf.variable_scope('fc', initializer=fc_init):
        fc1 = tf.layers.dense(last, hps.num_fc_nodes, activation=tf.nn.relu, name='fc1')
        fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
        logits = tf.layers.dense(fc1_dropout, classes_size, name='fc2')

    # calculate loss function, y_pred, accuracy
    with tf.name_scope('metrics'):
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=outputs
        )
        loss = tf.reduce_mean(softmax_loss)
        y_pred = tf.argmax(tf.nn.softmax(logits), 1, output_type=tf.int32)
        correct_pred = tf.equal(outputs, y_pred)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    with tf.name_scope('train_op'):
        # get all trainable variables
        trainable_vars = tf.trainable_variables()
        # show all these trainable variables
        for var in trainable_vars:
            print('variable name: %s' % var)
            # tf.logging.info('variable name: %s' % var)
        # get all grads from loss with respect to all trainable variables
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, trainable_vars), hps.clip_lstm_grads
        )

        # use AdamOptimizer
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)

        # apply grads to all trainable_variables & train
        train_op = optimizer.apply_gradients(
            zip(grads, trainable_vars), global_step=global_step
        )

    return ((inputs, outputs, keep_prob),
            (loss, accuracy),
            (train_op, global_step))
コード例 #38
0
    def __init__(self, encoder, decoder, embed_path):
        """
        Initializes your System

        :param encoder: an encoder that you constructed in train.py
        :param decoder: a decoder that you constructed in train.py
        :param args: pass in more arguments as needed
        """
        # self.input_size = cfg.batch_size
        self.embed_path = embed_path
        self.max_grad_norm = cfg.max_grad_norm
        self.encoder = encoder
        self.decoder = decoder
        # ==== set up placeholder tokens ========
        # shape [batch_size, context_max_length]
        self.context = tf.placeholder(tf.int32, (None, context_max_len))
        self.context_m = tf.placeholder(tf.bool, (None, context_max_len))
        self.question = tf.placeholder(tf.int32, (None, question_max_len))
        self.question_m = tf.placeholder(tf.bool, (None, question_max_len))
        self.answer_s = tf.placeholder(tf.int32, (None, ))
        self.answer_e = tf.placeholder(tf.int32, (None, ))
        # self.batch_size = tf.placeholder(tf.int32,[], name='batch_size')

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa",
                initializer=tf.uniform_unit_scaling_initializer(1.0, ),
                # regularizer=self.regularizer
                # initializer=identity_initializer
        ):
            self.setup_embeddings()
            self.setup_system()
            self.setup_loss()

            # ==== set up training/updating procedure ====
            self.global_step = tf.Variable(0, trainable=False)
            # starter_learning_rate = start_lr
            self.starter_learning_rate = tf.placeholder(tf.float32,
                                                        name='start_lr')
            # TODO: choose how to adapt learning rate at will
            learning_rate = tf.train.exponential_decay(
                self.starter_learning_rate,
                self.global_step,
                1000,
                0.96,
                staircase=True)
            tf.summary.scalar('learning_rate', learning_rate)
            # self.optimizer = get_optimizer(cfg.opt)
            self.optimizer = tf.train.AdamOptimizer(learning_rate)

            # TODO: consider graidents clipping.
            gradients = self.optimizer.compute_gradients(self.final_loss)
            capped_gvs = [(tf.clip_by_value(grad, -clip_by_val,
                                            clip_by_val), var)
                          for grad, var in gradients]
            grad = [x[0] for x in gradients]
            self.grad_norm = tf.global_norm(grad)
            tf.summary.scalar('grad_norm', self.grad_norm)
            self.train_op = self.optimizer.apply_gradients(
                capped_gvs, global_step=self.global_step)
            # one could try clip_by_global_norm
            # var = [x[1] for x in gradients]
            # grad, self.grad_norm = tf.clip_by_global_norm(grad, self.max_grad_norm)
            # self.train_op = self.optimizer.apply_gradients(zip(grad, var), global_step=self.global_step)

            self.saver = tf.train.Saver()
            self.merged = tf.summary.merge_all()
コード例 #39
0
    def __init__(self, *args, **kwargs):
        super(TransformerAE, self).__init__(*args, **kwargs)
        self.predict_mask = 1.0

        # Define bottleneck function
        self._hparams.bottleneck = functools.partial(
            discretization.discrete_bottleneck,
            hidden_size=self._hparams.hidden_size,
            z_size=self._hparams.z_size,
            filter_size=self._hparams.filter_size,
            bottleneck_kind=self._hparams.bottleneck_kind,
            num_blocks=self._hparams.num_blocks,
            num_residuals=self.hparams.num_residuals,
            reshape_method=self._hparams.reshape_method,
            beta=self._hparams.beta,
            ema=self._hparams.ema,
            epsilon=self._hparams.epsilon,
            decay=self._hparams.decay,
            random_top_k=self._hparams.random_top_k,
            soft_em=self.hparams.soft_em,
            num_samples=self.hparams.num_samples,
            softmax_k=self._hparams.softmax_k,
            temperature_warmup_steps=self._hparams.temperature_warmup_steps,
            do_hard_gumbel_softmax=self._hparams.do_hard_gumbel_softmax,
            num_flows=self._hparams.num_flows,
            approximate_gs_entropy=self._hparams.approximate_gs_entropy,
            discrete_mix=self._hparams.d_mix,
            noise_dev=self._hparams.noise_dev,
            startup_steps=self.hparams.startup_steps,
            summary=_DO_SUMMARIES)
        # Set the discretization bottleneck specific things here
        if self._hparams.bottleneck_kind in ["dvq", "gumbel-softmax-dvq"]:
            z_size_per_residual = self._hparams.z_size / self._hparams.num_residuals
            block_dim = int(self._hparams.hidden_size //
                            self._hparams.num_blocks)
            block_v_size = 2**(z_size_per_residual / self._hparams.num_blocks)
            block_v_size = int(block_v_size)

            if self._hparams.reshape_method == "project":
                tf.logging.info("Using projections for DVQ")
                tf.logging.info("Trainable projections = {}".format(
                    self._hparams.trainable_projections))

                projection_tensors = tf.get_variable(
                    name="projection",
                    shape=[
                        self._hparams.num_residuals, self._hparams.num_blocks,
                        self._hparams.hidden_size, block_dim
                    ],
                    initializer=tf.initializers.glorot_uniform(),
                    trainable=self._hparams.trainable_projections)

                self._hparams.bottleneck = functools.partial(
                    self._hparams.bottleneck,
                    projection_tensors=projection_tensors)
            elif self._hparams.reshape_method == "slice":
                tf.logging.info("Using slices for DVQ")
            else:
                raise ValueError("Unknown reshape method")

            means = tf.get_variable(
                name="means",
                shape=[
                    self._hparams.num_residuals, self._hparams.num_blocks,
                    block_v_size, block_dim
                ],
                initializer=tf.uniform_unit_scaling_initializer())

            # Create the shadow variables if we are using EMA
            ema_count = None
            ema_means = None
            if self._hparams.ema:
                ema_count = []
                for i in range(self._hparams.num_residuals):
                    ema_count_i = tf.get_variable(
                        "ema_count_{}".format(i),
                        [self._hparams.num_blocks, block_v_size],
                        initializer=tf.constant_initializer(0),
                        trainable=False)
                    ema_count.append(ema_count_i)
                with tf.colocate_with(means):
                    ema_means = []
                    for i in range(self._hparams.num_residuals):
                        ema_means_i = tf.get_variable(
                            "ema_means_{}".format(i),
                            [
                                self._hparams.num_blocks, block_v_size,
                                block_dim
                            ],
                            initializer=(
                                lambda shape, dtype=None, partition_info=None,  # pylint: disable=g-long-lambda
                                verify_shape=None: means.initialized_value()[i]
                            ),
                            trainable=False)
                        ema_means.append(ema_means_i)

            # Update bottleneck
            self._hparams.bottleneck = functools.partial(
                self._hparams.bottleneck,
                means=means,
                ema_count=ema_count,
                ema_means=ema_means)
コード例 #40
0
 def new_weight(self, shape, name, uniform=False, stddev=0.1):
     if not uniform:
         initial = tf.random_normal_initializer(stddev=stddev)
     else:
         initial = tf.uniform_unit_scaling_initializer(factor=stddev)
     return tf.get_variable(name=name, shape=shape, initializer=initial)
コード例 #41
0
        # Matrix with dimensions (batch_size by maximum question length)
        self.questions_placeholder = tf.placeholder(tf.int32, shape=(None, None))
        # Matrix with dimensions (batch_size by 2) where the second dimension is a binary indicator for each word. 0 represents the score for when word is not part of the answer and 1 represents the score when it is
        # TODO: confirm if this is score in fact or something else
            self.answers_placeholder = tf.placeholder(tf.int32, shape=(None, None))

        # Placeholders for bidirectional lstm
        #self.passage_sequence_lengths = tf.placeholder(tf.int32, [None])
        #self.question_sequence_lengths = tf.placeholder(tf.int32, [None])
        # Create global step counter so we can track and save how many batches
        # we've completed.
        #self.global_step = tf.Variable(0, name='global_step', trainable=False)
        # The ordering of indices in the currently running shuffled batch
        #self.idxs = tf.Variable(tf.zeros(self.size_train_dataset, dtype=tf.int32), \
                                name='idxs', trainable=False)

        # ==== assemble pieces ====
        with tf.variable_scope("qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
        self.preds = self.setup_predictions() # Creates embeddings and prediction
        self.loss = self.setup_loss(self.preds) # Creates loss computation
        self.train_op, self.grad_norm = self.setup_learning(self.loss) # Creates optimizer i.e. updates parameters in model
        
        # Create model saver
        self.saver = tf.train.Saver()






コード例 #42
0
ファイル: visual_models.py プロジェクト: pratikm141/logan
 def _extra_init(self):
     super()._extra_init()
     # self.w_init = tf.random_uniform_initializer(-np.sqrt(3) * .04, np.sqrt(3) * .04)
     self.w_init = tf.uniform_unit_scaling_initializer(1.43)
コード例 #43
0
def decode_spectrum(encoded_spectrum, intensity_inputs, decoder_inputs_emb,
                    keep_conv, keep_dense, scope):
    """TODO(nh2tran): docstring.
  RNN decoder for the sequence-to-sequence model.

  Args:
    decoder_inputs: A list of 2D Tensors [batch_size x cell.input_size].
    initial_state: 2D Tensor with shape [batch_size x cell.state_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    loop_function: If not None, this function will be applied to the i-th output
      in order to generate the i+1-st input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/abs/1506.03099.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x cell.output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x cell.input_size].
    scope: VariableScope for the created subgraph; defaults to "rnn_decoder".

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x cell.output_size] containing generated outputs.
      state: The state of each cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
        (Note that in some cases, like basic RNN cell or GRU cell, outputs and
         states can be the same. They are different for LSTM cells though.)
  """

    single_cell = rnn_cell.BasicLSTMCell(num_units=deepnovo_config.num_units,
                                         state_is_tuple=True)
    if deepnovo_config.num_layers > 1:
        # cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * deepnovo_config.num_layers)
        stacked_rnn = []
        for nn in range(deepnovo_config.num_layers):
            stacked_rnn.append(
                rnn_cell.BasicLSTMCell(num_units=deepnovo_config.num_units,
                                       state_is_tuple=True))
        cell = rnn_cell.MultiRNNCell(cells=stacked_rnn, state_is_tuple=True)
    else:
        cell = single_cell
    cell = rnn_cell.DropoutWrapper(cell,
                                   input_keep_prob=keep_dense,
                                   output_keep_prob=keep_dense)

    with variable_scope.variable_scope(scope):

        # INTENSITY-Model Parameters
        # intensity input [128, 27, 2, 10]

        if deepnovo_config.FLAGS.shared:  # shared-weight

            dense1_input_size = deepnovo_config.num_ion * deepnovo_config.WINDOW_SIZE
            dense1_output_size = deepnovo_config.num_units * 2  #+deepnovo_config.embedding_size #JOON
            dense1_W = variable_scope.get_variable(
                name="dense1_W_0",
                shape=[dense1_input_size, dense1_output_size],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            dense1_B = variable_scope.get_variable(
                name="dense1_B_0",
                shape=[dense1_output_size],
                initializer=tf.constant_initializer(0.1))

            dense_linear_W = variable_scope.get_variable(
                name="dense_linear_W", shape=[dense1_output_size, 1])
            dense_linear_B = variable_scope.get_variable(
                name="dense_linear_B",
                shape=[1],
                initializer=tf.constant_initializer(0.1))

        else:  # joint-weight

            # conv1: [128, 8, 20, 26] >> [128, 8, 20, 64] with kernel [1, 3, 26, 64]
            conv1_weights = tf.get_variable(
                name="conv1_weights",
                shape=[1, 3, deepnovo_config.vocab_size, 64],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            conv1_biases = tf.get_variable(
                name="conv1_biases",
                shape=[64],
                initializer=tf.constant_initializer(0.1))

            # conv2: [128, 8, 20, 64] >> [128, 8, 20, 64] with kernel [1, 2, 64, 64]
            conv2_weights = tf.get_variable(
                name="conv2_weights",
                shape=[1, 2, 64, 64],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            conv2_biases = tf.get_variable(
                name="conv2_biases",
                shape=[64],
                initializer=tf.constant_initializer(0.1))

            # max_pool: [128, 8, 20, 64] >> [128, 8, 10, 64]

            # dense1: # 4D >> [128, 512]
            dense1_input_size = deepnovo_config.num_ion * (
                deepnovo_config.WINDOW_SIZE //
                2) * 64  # deepnovo_config.vocab_size
            dense1_output_size = deepnovo_config.num_units  #JOON
            dense1_weights = tf.get_variable(
                "dense1_weights",
                shape=[dense1_input_size, dense1_output_size],
                initializer=tf.uniform_unit_scaling_initializer(1.43))
            dense1_biases = tf.get_variable(
                "dense1_biases",
                shape=[dense1_output_size],
                initializer=tf.constant_initializer(0.1))

            # for testing
            dense1_W_penalty = tf.multiply(tf.nn.l2_loss(dense1_weights),
                                           deepnovo_config.l2_loss_weight,
                                           name='dense1_W_penalty')

        # cat
        dense_concat_W = variable_scope.get_variable(
            name="dense_concat_W",
            # shape=[deepnovo_config.num_units+deepnovo_config.embedding_size, deepnovo_config.num_units],#JOON?
            shape=[deepnovo_config.num_units * 2,
                   deepnovo_config.num_units],  #JOON?
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        dense_concat_B = variable_scope.get_variable(
            name="dense_concat_B",
            shape=[deepnovo_config.num_units],  #JOON
            initializer=tf.constant_initializer(0.1))

        # DECODING - SPECTRUM as Input 0
        with variable_scope.variable_scope("LSTM_cell"):
            input0 = encoded_spectrum
            print('input0 = encoded_spectrum:', encoded_spectrum)
            batch_size = array_ops.shape(input0)[0]
            zero_state = cell.zero_state(batch_size=batch_size,
                                         dtype=tf.float32)
            _, lstm_state_0 = cell(inputs=input0, state=zero_state)

        # nobi
        # DECODING - lstm_input_projected
        with variable_scope.variable_scope("LSTM_input_projected"):
            lstm_input_projected_W = variable_scope.get_variable(
                name="lstm_input_projected_W",
                shape=[
                    deepnovo_config.embedding_size, deepnovo_config.num_units
                ])
            lstm_input_projected_B = variable_scope.get_variable(
                name="lstm_input_projected_B",
                shape=[deepnovo_config.num_units],
                initializer=tf.constant_initializer(0.1))

        # DECODING LOOP
        # nobi
        outputs = []
        AA_1 = decoder_inputs_emb[0]  # padding [AA_1, AA_2, ?] with GO/EOS
        # ltsm.len_full
        lstm_state = lstm_state_0

        for i, AA_2 in enumerate(decoder_inputs_emb):

            # nobi
            if i > 0:  # to-do-later: bring variable definitions out of the loop
                variable_scope.get_variable_scope().reuse_variables()

            # INTENSITY-Model
            candidate_intensity = intensity_inputs[i]  # [128, 27, 2, 10]

            if deepnovo_config.FLAGS.shared:  # shared-weight

                candidate_intensity_reshape = tf.reshape(
                    candidate_intensity,
                    shape=[-1, dense1_input_size])  # [128*27, 2*10]

                layer_dense1_input = candidate_intensity_reshape
                layer_dense1 = tf.nn.relu(
                    tf.matmul(layer_dense1_input, dense1_W) +
                    dense1_B)  # [128*27, 1024]
                layer_dense1_drop = tf.nn.dropout(layer_dense1, keep_dense)
                layer_dense1_output = (
                    tf.matmul(layer_dense1_drop, dense_linear_W) +
                    dense_linear_B)  # [128*27,1]

                # Intensity output
                intensity_output = tf.reshape(
                    layer_dense1_output,
                    shape=[-1, deepnovo_config.vocab_size])  # [128,27]

            else:  # joint-weight

                # image_batch: [128, 26, 8, 20] >> [128, 8, 20, 26]
                # This is a bug, should be fixed at the input processing later.
                image_batch = tf.transpose(candidate_intensity,
                                           perm=[0, 2, 3, 1])  # [128,8,20,26]

                # conv1: [128, 8, 20, 26] >> [128, 8, 20, 64] with kernel [1, 3, 26, 64]
                conv1 = tf.nn.relu(
                    tf.nn.conv2d(image_batch,
                                 conv1_weights,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME') + conv1_biases)

                # conv2: [128, 8, 20, 64] >> [128, 8, 20, 64] with kernel [1, 2, 64, 64]
                conv2 = tf.nn.relu(
                    tf.nn.conv2d(conv1,
                                 conv2_weights,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME') + conv2_biases)
                conv2 = tf.nn.max_pool(conv2,
                                       ksize=[1, 1, 3, 1],
                                       strides=[1, 1, 2, 1],
                                       padding='SAME')  # [128, 8, 10, 64]
                conv2 = tf.nn.dropout(conv2, keep_conv)

                # dense1: 4D >> [128, 512]
                dense1_input = tf.reshape(
                    conv2, [-1, dense1_input_size])  # 2D flatten
                dense1 = tf.nn.relu(
                    tf.matmul(dense1_input, dense1_weights) +
                    dense1_biases)  # [128, 512]

                # dense2: # [128, 512] >> [128, 512]
                #~ dense2 = tf.nn.relu(tf.matmul(dense1, dense2_weights) + dense2_biases) # [128, 512]

                #~ dropout1 = tf.nn.dropout(dense2, keep_dense, name="dropout1")
                dropout1 = tf.nn.dropout(dense1, keep_dense, name="dropout1")

                # logit_linear: [128, 512] >> [128, 27]
                #~ intensity_output = tf.add(tf.matmul(dropout1, linear_weights),
                #~ linear_biases) # [128, 27]
                intensity_output = dropout1
                with variable_scope.variable_scope(
                        "intensity_output_projected"):
                    intensity_output_projected = rnn_cell_impl._linear(  # TODO(nh2tran): _linear
                        args=intensity_output,
                        output_size=deepnovo_config.vocab_size,  # [128,27]
                        bias=True,
                        bias_initializer=None,  #0.1,
                        kernel_initializer=None)

            # nobi
            # LSTM-Model
            AA_1_projected = (tf.matmul(AA_1, lstm_input_projected_W) +
                              lstm_input_projected_B)
            AA_2_projected = (tf.matmul(AA_2, lstm_input_projected_W) +
                              lstm_input_projected_B)

            with variable_scope.variable_scope("LSTM_cell"):

                variable_scope.get_variable_scope().reuse_variables()

                # print('cell:', cell)
                # print('AA_2_projected:', AA_2_projected)
                # print('lstm_state:', lstm_state)
                lstm_output, lstm_state = cell(inputs=AA_2_projected,
                                               state=lstm_state)

                AA_1 = AA_2

            with variable_scope.variable_scope("lstm_output_projected"):
                lstm_output_projected = rnn_cell_impl._linear(  # TODO(nh2tran): _linear
                    args=lstm_output,
                    output_size=deepnovo_config.vocab_size,  # [128,27]
                    bias=True,
                    bias_initializer=None,  #0.1,
                    kernel_initializer=None)

            # LSTM-Intensity Connection-Model >> OUTPUT
            if deepnovo_config.FLAGS.use_intensity and deepnovo_config.FLAGS.use_lstm:

                #~ output_logit = tf.nn.relu(tf.matmul(lstm_output_projected, denseL_W)
                #~ + tf.matmul(intensity_output_projected, denseI_W)
                #~ + denseC_B)

                # cat
                concat = tf.concat(axis=1,
                                   values=[intensity_output, lstm_output])
                concat_dense = tf.nn.relu(
                    tf.matmul(concat, dense_concat_W) + dense_concat_B)
                concat_drop = tf.nn.dropout(concat_dense, keep_dense)

                with variable_scope.variable_scope("output_logit"):
                    output_logit = rnn_cell_impl._linear(
                        args=concat_drop,  # TODO(nh2tran): _linear
                        output_size=deepnovo_config.vocab_size,  # [128,27]
                        bias=True,
                        bias_initializer=None,  #0.1,
                        kernel_initializer=None)

            elif deepnovo_config.FLAGS.use_intensity:
                # intensity only (without LSTM >> up to 10% loss, especially at AA-accuracy?)
                output_logit = intensity_output_projected

            elif deepnovo_config.FLAGS.use_lstm:
                output_logit = lstm_output_projected

            else:
                print("ERROR: wrong LSTM-Intensity model specified!")
                sys.exit()

            outputs.append(output_logit)

    return (outputs, dense1_W_penalty)
コード例 #44
0
ファイル: MadNet.py プロジェクト: shairoz/MadNet
def resnet_v1_siamese(input_shape,
                      depth,
                      num_classes=10,
                      weight_decay=0.0,
                      embedding_activation='leaky-relu',
                      embedding_aux_loss='cosine',
                      reduce_variance=False,
                      reduce_jacobian_loss=False,
                      load_weights='',
                      reduce_juccobian_coeff=0.01):
    '''
    A resent model with the different MAD loss components
    :param input_shape:
    :param depth: numst be 6n+2 (32,56...)
    :param num_classes: number of classes
    :param weight_decay: decay to use for an l2 regularization
    :param embedding_activation: replace embedding layer activation functions
    :param embedding_aux_loss: loss to use for Siamese currently supporting reduce of margin and cosine distance
    :param reduce_variance: bool, if True adding the reduce variance loss
    :param reduce_jacobian_loss: bool, if True adding the reduce Jacobian loss
    :param load_weights: path to pretrained model to load, must be with identical configuration
    :param reduce_juccobian_coeff
    :return:
    '''

    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    in1 = Input(shape=input_shape)
    in2 = Input(shape=input_shape)
    conv_layer_list = []

    x1, x2, conv_layer_list = resnet_layer_siamese(
        in1,
        in2,
        activation=embedding_activation,
        conv_first=True,
        weight_decay=weight_decay,
        conv_layer_list=conv_layer_list)
    # Instantiate the stack of residual units
    first_iter = True
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            if not first_iter:

                bn = BatchNormalization(momentum=0.9)
                if embedding_activation == 'leaky-relu':

                    lr = LeakyReLU(alpha=0.1)
                else:
                    lr = Activation(embedding_activation)
                y1 = bn(x1)
                y1 = lr(y1)
                y2 = bn(x2)
                y2 = lr(y2)
            else:
                y1 = x1
                y2 = x2
            y1, y2, conv_layer_list = resnet_layer_siamese(
                y1,
                y2,
                num_filters=num_filters,
                strides=strides,
                activation=embedding_activation,
                conv_first=True,
                weight_decay=weight_decay,
                conv_layer_list=conv_layer_list)
            y1, y2, conv_layer_list = resnet_layer_siamese(
                y1,
                y2,
                num_filters=num_filters,
                activation=None,
                conv_first=True,
                weight_decay=weight_decay,
                batch_normalization=False,
                conv_layer_list=conv_layer_list)
            if stack > 0 and res_block == 0:  # first layer but not first stack
                ap = AveragePooling2D(2, 2, 'valid')
                x1 = ap(x1)
                x2 = ap(x2)

                x1 = Lambda(pad_depth,
                            arguments={'desired_channels': y1.shape[-1]})(x1)
                x2 = Lambda(pad_depth,
                            arguments={'desired_channels': y2.shape[-1]})(x2)

            x1 = keras.layers.add([x1, y1])
            x2 = keras.layers.add([x2, y2])

            first_iter = False

        num_filters *= 2

    bn = BatchNormalization(momentum=0.9)
    x1 = bn(x1)
    x2 = bn(x2)
    if embedding_activation == 'leaky-relu':
        lr = LeakyReLU(alpha=0.1)
    elif embedding_activation == 'tanh':
        print("Using tanh activation")
        lr = Activation('tanh')

    x1 = lr(x1)
    x2 = lr(x2)
    ap = AveragePooling2D(pool_size=int(input_shape[0] / 4), name='bottleneck')
    x1 = ap(x1)
    x2 = ap(x2)

    emb1 = Flatten()(x1)
    emb2 = Flatten()(x2)

    dense = Dense(
        num_classes,
        name='logits',
        kernel_initializer=tf.uniform_unit_scaling_initializer(factor=1.0),
        kernel_regularizer=l2(weight_decay),
        bias_initializer=tf.constant_initializer())

    logits1 = dense(emb1)
    logits2 = dense(emb2)

    output1 = Activation('softmax', name='main_output1')(logits1)
    output2 = Activation('softmax', name='main_output2')(logits2)
    if embedding_aux_loss == 'cosine':
        aux_out = Dot(1, normalize=True)([emb1, emb2])
    elif embedding_aux_loss == 'margin':
        print("using margin loss")
        aux_out = Lambda(lambda l: K.concatenate(
            (K.expand_dims(l[0], axis=-1), K.expand_dims(l[1], axis=-1)),
            axis=-1))([emb1, emb2])

    output_list = [output1, output2, aux_out]
    if reduce_variance:
        output_list += [emb1, emb2]
    if reduce_jacobian_loss:
        jacobian_output1 = Lambda(lambda l: reduce_juccobian_coeff * K.sqrt(
            K.sum(K.pow(K.gradients(output1, l)[0], 2), axis=(1, 2, 3))),
                                  output_shape=[1])(in1)
        jacobian_output2 = Lambda(lambda l: reduce_juccobian_coeff * K.sqrt(
            K.sum(K.pow(K.gradients(output2, l)[0], 2), axis=(1, 2, 3))),
                                  output_shape=[1])(in2)

        output_list += [jacobian_output1, jacobian_output2]

    model = Model(inputs=[in1, in2], outputs=output_list)

    if load_weights != '':  #loading weights
        temp_model = keras.Model(model.input[0],
                                 model.get_layer('main_output1').output)
        temp_model.load_weights(load_weights)
        temp_model = keras.Model(model.input[1],
                                 model.get_layer('main_output2').output)
        temp_model.load_weights(load_weights)

    return model
コード例 #45
0
    def __init__(self, pretrained_embeddings, flags):
        """
        Initializes your System

        :param args: pass in more arguments as needed
        """
        self.pretrained_embeddings = pretrained_embeddings
        self.flags = flags
        self.h_size = self.flags.state_size
        self.p_size = self.flags.output_size
        self.q_size = self.flags.question_size
        self.embed_size = self.flags.embedding_size
        self.dropout = self.flags.dropout
        self.encoder = Encoder(hidden_size=self.h_size,
                               dropout=(1.0 - self.flags.dropout))

        self.decoder = Decoder(hidden_size=self.h_size,
                               output_size=self.p_size,
                               dropout=(1.0 - self.flags.dropout))

        # ==== set up placeholder tokens ========

        self.context_placeholder = tf.placeholder(tf.int32,
                                                  shape=(None, self.p_size),
                                                  name='context_placeholder')
        self.question_placeholder = tf.placeholder(tf.int32,
                                                   shape=(None, self.q_size),
                                                   name='question_placeholder')
        self.answer_span_placeholder = tf.placeholder(
            tf.int32, shape=(None, 2), name='answer_span_placeholder')
        self.mask_q_placeholder = tf.placeholder(tf.int32,
                                                 shape=(None, ),
                                                 name='mask_q_placeholder')
        self.mask_ctx_placeholder = tf.placeholder(tf.int32,
                                                   shape=(None, ),
                                                   name='mask_ctx_placeholder')
        self.dropout_placeholder = tf.placeholder(tf.float32,
                                                  shape=(),
                                                  name='dropout_placeholder')

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.setup_embeddings()
            self.setup_system()
            self.setup_loss()

        # ==== set up training/updating procedure ====
        self.global_step = tf.Variable(0, trainable=False)
        self.starter_learning_rate = self.flags.learning_rate

        self.learning_rate = self.starter_learning_rate

        # learning rate decay
        # self.learning_rate = tf.train.exponential_decay(self.starter_learning_rate, self.global_step,
        #                                    1000, 0.96, staircase=True)

        self.optimizer = get_optimizer("adam")

        if self.flags.grad_clip:
            # gradient clipping
            self.optimizer = self.optimizer(self.learning_rate)
            grads = self.optimizer.compute_gradients(self.loss)
            for i, (grad, var) in enumerate(grads):
                if grad is not None:
                    grads[i] = (tf.clip_by_norm(grad,
                                                self.flags.max_gradient_norm),
                                var)
            self.train_op = self.optimizer.apply_gradients(
                grads, global_step=self.global_step)
        else:
            # no gradient clipping
            self.train_op = self.optimizer(self.learning_rate).minimize(
                self.loss, global_step=self.global_step)

        self.saver = tf.train.Saver()
コード例 #46
0
# cell_type = ThetaRNNCell

if c.num_of_layers > 1:
    cells = rc.MultiRNNCell(
        [cell_type(c.net_size) for _ in xrange(c.num_of_layers)])
    net_out_size = cells._cells[-1].state_size
else:
    cells = cell_type(c.net_size)
    net_out_size = cells.state_size

forecast_steps = c.forecast_ms

#init = lambda shape, dtype: np.reshape(-np.sqrt(3) / np.sqrt(shape[0]) + np.random.random((shape[0], shape[3])) * 2.0*np.sqrt(3) / np.sqrt(shape[0]), (shape[0], 1, 1, shape[3]))
init = lambda shape, dtype: generate_dct_dictionary(shape[0], shape[
    3]).reshape(shape[0], 1, 1, shape[3])
recov_init = tf.uniform_unit_scaling_initializer(factor=1.0)

input = tf.placeholder(tf.float32, shape=(1, c.seq_size, 1, 1), name="Input")
target = tf.placeholder(tf.float32, shape=(1, c.seq_size, 1, 1), name="Target")

filter = vs.get_variable("W", [c.filter_len, 1, 1, c.filters_num],
                         initializer=init)
bias = vs.get_variable(
    "b", [c.filters_num],
    initializer=lambda shape, dtype: np.zeros(c.filters_num))
recov_filter = vs.get_variable("Wr", [c.filter_len, 1, 1, net_out_size],
                               initializer=recov_init)

state = tf.placeholder(tf.float32,
                       shape=(c.batch_size, cells.state_size),
                       name="State")
コード例 #47
0
 def testDuplicatedInitializer(self):
     for use_gpu in [False, True]:
         init = tf.uniform_unit_scaling_initializer()
         self.assertFalse(duplicated_initializer(self, init, use_gpu, 1))
コード例 #48
0
    def __init__(self, encoder, decoder, rev_vocab, args):
        """
        Initializes your System

        :param encoder: an encoder that you constructed in train.py
        :param decoder: a decoder that you constructed in train.py
        :param args: pass in more arguments as needed
        """

        # ==== Setup hyper parameters =======
        self.max_length_passage = args.max_passage_length
        self.max_length_question = args.max_question_length
        self.embedding_size = args.embedding_size
        self.embed_path = args.embed_path
        self.learning_rate = args.learning_rate
        self.epochs = args.epochs
        self.start_epoch = args.start_epoch
        self.batch_size = args.batch_size
        self.max_gradient_norm = args.max_gradient_norm
        self.train_dir = args.train_dir
        self.saved_name = args.saved_name
        self.eval_num_samples = args.eval_num_samples
        self.val_and_save_num_batches = args.val_and_save_num_batches
        self.val_cost_frac = args.val_cost_frac
        self.size_train_dataset = args.size_train_dataset
        self.sigma_threshold = args.sigma_threshold

        # ==== Set encoder and decoder
        self.encoder = encoder
        self.decoder = decoder
        self.rev_vocab = rev_vocab

        # ==== Load any data we need ========
        # First load word embeddings
        self.pretrained_embeddings = np.load(
            self.embed_path)['glove']  # We assume it's glove

        # ==== set up placeholder tokens ========

        # The first dimension is the batch_size and second dimension represents maximum passage length
        self.passages_placeholder = tf.placeholder(tf.int32,
                                                   shape=(None, None))
        # The first dimension is the batch_size and second dimension represents maximum question length
        self.questions_placeholder = tf.placeholder(tf.int32,
                                                    shape=(None, None))
        # The first dimension is the batch_size and second dimension represents binary indicator for each word
        # 0 represents the word is not part of the answer. 1 represents it is.
        self.answers_placeholder = tf.placeholder(tf.int32, shape=(None, None))
        # Need masks for both passages and questions
        self.mask_passage_placeholder = tf.placeholder(tf.bool,
                                                       shape=(None, None))
        self.mask_question_placeholder = tf.placeholder(tf.bool,
                                                        shape=(None, None))
        # Placeholders for bidirectional lstm
        # TODO: Question: is there a better way of doing this??
        # This is constant list of batch_size where each index represents the number of words in the passage
        self.passage_sequence_lengths = tf.placeholder(tf.int32, [None])
        self.question_sequence_lengths = tf.placeholder(tf.int32, [None])

        # Create global step counter so we can track and save how many batches
        # we've completed.
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        # The ordering of indices in the currently running shuffled batch
        self.idxs = tf.Variable(tf.zeros(self.size_train_dataset, dtype=tf.int32), \
                                         name='idxs', trainable=False)

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.preds = self.setup_predictions(
            )  # Creates embeddings and prediction
            self.loss = self.setup_loss(self.preds)  # Creates loss computation
            self.train_op, self.grad_norm, self.new_grad_norm = self.setup_learning(
                self.loss
            )  # Creates optimizer i.e. updates parameters in model
        # ==== set up training/updating procedure ====

        # Create model saver
        self.saver = tf.train.Saver()
コード例 #49
0
def initialize(sess):
  """Initialize data and model."""
  if FLAGS.jobid >= 0:
    data.log_filename = os.path.join(FLAGS.train_dir, "log%d" % FLAGS.jobid)
  data.print_out("NN ", newline=False)

  # Set random seed.
  seed = FLAGS.random_seed + max(0, FLAGS.jobid)
  tf.set_random_seed(seed)
  random.seed(seed)
  np.random.seed(seed)

  # Check data sizes.
  assert data.bins
  min_length = 3
  max_length = min(FLAGS.max_length, data.bins[-1])
  assert max_length + 1 > min_length
  while len(data.bins) > 1 and data.bins[-2] > max_length + EXTRA_EVAL:
    data.bins = data.bins[:-1]
  assert data.bins[0] > FLAGS.rx_step
  data.forward_max = max(FLAGS.forward_max, data.bins[-1])
  nclass = min(FLAGS.niclass, FLAGS.noclass)
  data_size = FLAGS.train_data_size if FLAGS.mode == 0 else 1000

  # Initialize data for each task.
  tasks = FLAGS.task.split("-")
  for t in tasks:
    for l in xrange(max_length + EXTRA_EVAL - 1):
      data.init_data(t, l, data_size, nclass)
    data.init_data(t, data.bins[-2], data_size, nclass)
    data.init_data(t, data.bins[-1], data_size, nclass)
    end_size = 4 * 1024 if FLAGS.mode > 0 else 1024
    data.init_data(t, data.forward_max, end_size, nclass)

  # Print out parameters.
  curriculum = FLAGS.curriculum_bound
  msg1 = ("layers %d kw %d h %d kh %d relax %d batch %d noise %.2f task %s"
          % (FLAGS.nconvs, FLAGS.kw, FLAGS.height, FLAGS.kh, FLAGS.rx_step,
             FLAGS.batch_size, FLAGS.grad_noise_scale, FLAGS.task))
  msg2 = "data %d %s" % (FLAGS.train_data_size, msg1)
  msg3 = ("cut %.2f pull %.3f lr %.2f iw %.2f cr %.2f nm %d d%.4f gn %.2f %s" %
          (FLAGS.cutoff, FLAGS.pull_incr, FLAGS.lr, FLAGS.init_weight,
           curriculum, FLAGS.nmaps, FLAGS.dropout, FLAGS.max_grad_norm, msg2))
  data.print_out(msg3)

  # Create checkpoint directory if it does not exist.
  checkpoint_dir = os.path.join(FLAGS.train_dir, "neural_gpu%s"
                                % ("" if FLAGS.jobid < 0 else str(FLAGS.jobid)))
  if not gfile.IsDirectory(checkpoint_dir):
    data.print_out("Creating checkpoint directory %s." % checkpoint_dir)
    gfile.MkDir(checkpoint_dir)

  # Create model and initialize it.
  tf.get_variable_scope().set_initializer(
      tf.uniform_unit_scaling_initializer(factor=1.8 * FLAGS.init_weight))
  model = neural_gpu.NeuralGPU(
      FLAGS.nmaps, FLAGS.nmaps, FLAGS.niclass, FLAGS.noclass, FLAGS.dropout,
      FLAGS.rx_step, FLAGS.max_grad_norm, FLAGS.cutoff, FLAGS.nconvs,
      FLAGS.kw, FLAGS.kh, FLAGS.height, FLAGS.mode, FLAGS.lr,
      FLAGS.iw_batches,
      FLAGS.pull, FLAGS.pull_incr, min_length + 3)
  data.print_out("Created model.")
  sess.run(tf.initialize_all_variables())
  data.print_out("Initialized variables.")

  # Load model from parameters if a checkpoint exists.
  ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
  if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
    data.print_out("Reading model parameters from %s"
                   % ckpt.model_checkpoint_path)
    model.saver.restore(sess, ckpt.model_checkpoint_path)

  # Check if there are ensemble models and get their checkpoints.
  ensemble = []
  ensemble_dir_list = [d for d in FLAGS.ensemble.split(",") if d]
  for ensemble_dir in ensemble_dir_list:
    ckpt = tf.train.get_checkpoint_state(ensemble_dir)
    if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
      data.print_out("Found ensemble model %s" % ckpt.model_checkpoint_path)
      ensemble.append(ckpt.model_checkpoint_path)

  # Return the model and needed variables.
  return (model, min_length, max_length, checkpoint_dir, curriculum, ensemble)
コード例 #50
0
ファイル: nn_ops.py プロジェクト: 2012060010010/DL_code
def lstm_layer(inp,
               length=None,
               state=None,
               memory=None,
               num_nodes=None,
               backward=False,
               clip=50.0,
               reg_func=tf.nn.l2_loss,
               weight_reg=False,
               weight_collection="LSTMWeights",
               bias_reg=False,
               stddev=None,
               seed=None,
               decode=False,
               use_native_weights=False,
               name=None):
  """Adds ops for an LSTM layer.

  This adds ops for the following operations:

    input => (forward-LSTM|backward-LSTM) => output

  The direction of the LSTM is determined by `backward`. If it is false, the
  forward LSTM is used, the backward one otherwise.

  Args:
    inp: A 3-D tensor of shape [`batch_size`, `max_length`, `feature_dim`].
    length: A 1-D tensor of shape [`batch_size`] and type int64. Each element
            represents the length of the corresponding sequence in `inp`.
    state: If specified, uses it as the initial state.
    memory: If specified, uses it as the initial memory.
    num_nodes: The number of LSTM cells.
    backward: If true, reverses the `inp` before adding the ops. The output is
              also reversed so that the direction is the same as `inp`.
    clip: Value used to clip the cell values.
    reg_func: Function used for the weight regularization such as
              `tf.nn.l2_loss`.
    weight_reg: If true, regularize the filter weights with `reg_func`.
    weight_collection: Collection to add the weights to for regularization.
    bias_reg: If true, regularize the bias vector with `reg_func`.
    stddev: Standard deviation used to initialize the variables.
    seed: Seed used to initialize the variables.
    decode: If true, does not add ops which are not used for inference.
    use_native_weights: If true, uses weights in the same format as the native
                        implementations.
    name: Name of the op.

  Returns:
    A 3-D tensor of shape [`batch_size`, `max_length`, `num_nodes`].
  """
  with tf.variable_scope(name):
    if backward:
      if length is None:
        inp = tf.reverse(inp, [False, True, False])
      else:
        inp = tf.reverse_sequence(inp, length, 1, 0)

    num_prev = inp.get_shape()[2]
    if stddev:
      initializer = tf.truncated_normal_initializer(stddev=stddev, seed=seed)
    else:
      initializer = tf.uniform_unit_scaling_initializer(seed=seed)

    if use_native_weights:
      with tf.variable_scope("LSTMCell"):
        w = tf.get_variable(
            "W_0",
            shape=[num_prev + num_nodes, 4 * num_nodes],
            initializer=initializer,
            dtype=tf.float32)
        w_i_m = tf.slice(w, [0, 0], [num_prev, 4 * num_nodes], name="w_i_m")
        w_m_m = tf.reshape(
            tf.slice(w, [num_prev, 0], [num_nodes, 4 * num_nodes]),
            [num_nodes, 4, num_nodes],
            name="w_m_m")
    else:
      w_i_m = tf.get_variable("w_i_m", [num_prev, 4 * num_nodes],
                              initializer=initializer)
      w_m_m = tf.get_variable("w_m_m", [num_nodes, 4, num_nodes],
                              initializer=initializer)

    if not decode and weight_reg:
      tf.add_to_collection(weight_collection, reg_func(w_i_m, name="w_i_m_reg"))
      tf.add_to_collection(weight_collection, reg_func(w_m_m, name="w_m_m_reg"))

    batch_size = shapes.tensor_dim(inp, dim=0)
    num_frames = shapes.tensor_dim(inp, dim=1)
    prev = tf.reshape(inp, tf.pack([batch_size * num_frames, num_prev]))

    if use_native_weights:
      with tf.variable_scope("LSTMCell"):
        b = tf.get_variable(
            "B",
            shape=[4 * num_nodes],
            initializer=tf.zeros_initializer,
            dtype=tf.float32)
      biases = tf.identity(b, name="biases")
    else:
      biases = tf.get_variable(
          "biases", [4 * num_nodes], initializer=tf.constant_initializer(0.0))
    if not decode and bias_reg:
      tf.add_to_collection(
          weight_collection, reg_func(
              biases, name="biases_reg"))
    prev = tf.nn.xw_plus_b(prev, w_i_m, biases)

    prev = tf.reshape(prev, tf.pack([batch_size, num_frames, 4, num_nodes]))
    if state is None:
      state = tf.fill(tf.pack([batch_size, num_nodes]), 0.0)
    if memory is None:
      memory = tf.fill(tf.pack([batch_size, num_nodes]), 0.0)

    out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip)

    if backward:
      if length is None:
        out = tf.reverse(out, [False, True, False])
      else:
        out = tf.reverse_sequence(out, length, 1, 0)

  return out, mem
コード例 #51
0
    def _create_inference(self, item_input, is_reuse):
        with tf.name_scope('global_module'):
            u_emb = tf.nn.embedding_lookup(self.all_weights['user_embed'],
                                           self.user_input)
            v_emb = tf.nn.embedding_lookup(self.all_weights['item_embed'],
                                           item_input)

            mf_interact = tf.nn.dropout(tf.multiply(u_emb, v_emb),
                                        keep_prob=self.dropout_keep)

        with tf.name_scope('aspect_module'):
            u_hist = tf.nn.embedding_lookup(
                self.all_weights['user_history_aspect'],
                self.user_input,
                name='u_hist')
            v_hist = tf.nn.embedding_lookup(
                self.all_weights['item_history_aspect'],
                item_input,
                name='v_hist')

            u_hist_a_embs = tf.nn.embedding_lookup(
                self.all_weights['aspect_embed'], u_hist, name='u_hist_a_embs')
            v_hist_a_embs = tf.nn.embedding_lookup(
                self.all_weights['aspect_embed'], v_hist, name='v_hist_a_embs')

            u_hist_a_embs = tf.layers.dense(
                u_hist_a_embs,
                units=self.num_aspect_factor,
                name='aspect_embed_trans',
                kernel_initializer=tf.uniform_unit_scaling_initializer(
                    factor=1.0),
                use_bias=False,
                reuse=is_reuse)
            v_hist_a_embs = tf.layers.dense(
                v_hist_a_embs,
                units=self.num_aspect_factor,
                name='aspect_embed_trans',
                kernel_initializer=tf.uniform_unit_scaling_initializer(
                    factor=1.0),
                use_bias=False,
                reuse=True)

            user_mask_padding = tf.nn.embedding_lookup(
                self.all_weights['mask_lookup_table'],
                u_hist,
                name='user_mask_padding')
            item_mask_padding = tf.nn.embedding_lookup(
                self.all_weights['mask_lookup_table'],
                v_hist,
                name='item_mask_padding')

            u_hist_a_embs = tf.multiply(user_mask_padding, u_hist_a_embs,
                                        'u_hist_a_embs_masked')
            v_hist_a_embs = tf.multiply(item_mask_padding, v_hist_a_embs,
                                        'v_hist_a_embs_masked')

            with tf.name_scope('aspect_interact'):
                u_hist_a_embs_interact = tf.nn.l2_normalize(u_hist_a_embs,
                                                            dim=-1)
                v_hist_a_embs_interact = tf.nn.l2_normalize(v_hist_a_embs,
                                                            dim=-1)

                u_aspect_array_ = tf.expand_dims(u_hist_a_embs_interact, 2)
                v_aspect_array_ = tf.expand_dims(v_hist_a_embs_interact, 1)

                interact = tf.multiply(u_aspect_array_, v_aspect_array_)

            with tf.name_scope('aspect_level_attention'):
                att_l2_1 = tf.layers.dense(interact,
                                           units=1,
                                           name='att_l2_1',
                                           reuse=is_reuse)
                att_l2 = tf.nn.softmax(att_l2_1, dim=2)

            with tf.name_scope("user_level_attention"):
                v_a_emb = tf.tile(
                    tf.reduce_sum(v_hist_a_embs_interact,
                                  axis=1,
                                  keep_dims=True), [1, self.MaxPerUser, 1])
                input_att_l1 = v_a_emb * u_hist_a_embs_interact
                att_l1_1 = tf.layers.dense(input_att_l1,
                                           units=1,
                                           name='att_l1_1',
                                           reuse=is_reuse)
                att_l1 = tf.nn.softmax(att_l1_1, dim=1)

        with tf.name_scope('attach_attention'):
            weighted_interact_l2 = tf.reduce_sum(tf.multiply(att_l2, interact),
                                                 axis=2)
            aspect_interact = tf.reduce_sum(tf.multiply(
                att_l1, weighted_interact_l2),
                                            axis=1)
            aspect_interact = tf.nn.dropout(aspect_interact, self.dropout_keep)

        with tf.name_scope('concatenate'):
            interact_vector = tf.concat([mf_interact, aspect_interact],
                                        axis=-1)

        with tf.name_scope('prediction'):
            rating_preds = tf.matmul(interact_vector,
                                     self.all_weights['W_out'],
                                     name='prediction')

        return rating_preds
コード例 #52
0
 def get_instance(args):
     """
     create an instance of the initializer
     """
     factor = float(args.get('factor', 1.0))
     return tf.uniform_unit_scaling_initializer(factor, seed=SEED)
コード例 #53
0
ファイル: qacnn.py プロジェクト: zr8091/InsuranceQA_zh
 def add_hl(self, q_embed, aplus_embed, aminus_embed):
     with tf.variable_scope('HL'):
         W = tf.get_variable('weights', shape=[self.config.embedding_size, self.config.hidden_size], initializer=tf.uniform_unit_scaling_initializer())
         b = tf.get_variable('biases', initializer=tf.constant(0.1, shape=[self.config.hidden_size]))
         h_q = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(q_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.sequence_length, self.config.hidden_size])
         h_ap = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(aplus_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.sequence_length, self.config.hidden_size])
         h_am = tf.reshape(tf.nn.tanh(tf.matmul(tf.reshape(aminus_embed, [-1, self.config.embedding_size]), W)+b), [-1, self.config.sequence_length, self.config.hidden_size])
         tf.add_to_collection('total_loss', 0.5*self.config.l2_reg_lambda*tf.nn.l2_loss(W))
         return h_q, h_ap, h_am
コード例 #54
0
ファイル: DenoisingAE.py プロジェクト: vitor-bnunes/NetWalk
def autoencoder(data,
                corrupt_prob,
                dimensions,
                beta=0.01,
                rho=0.4,
                activation=tf.nn.sigmoid,
                lamb=0.01,
                gamma=0.01):
    # init_random = tf.random_normal_initializer(mean=0.0, stddev=1.0, seed=24, dtype=tf.float32)
    #
    # init_truncated = tf.truncated_normal_initializer(mean=0.0, stddev=1.0, seed=24, dtype=tf.float32)
    #
    # init_uniform = tf.random_uniform_initializer(minval=0, maxval=1, seed=24, dtype=tf.float32)

    init_uniform_unit = tf.uniform_unit_scaling_initializer(factor=1.0,
                                                            seed=24,
                                                            dtype=tf.float32)

    # init_variance_scaling_normal = tf.variance_scaling_initializer(scale=1.0, mode="fan_in",
    #                                                                distribution="normal", seed=24, dtype=tf.float32)
    # init_variance_scaling_uniform = tf.variance_scaling_initializer(scale=1.0, mode="fan_in",
    #                                                                 distribution="uniform", seed=24, dtype=tf.float32)
    # init_orthogonal = tf.orthogonal_initializer(gain=1.0, seed=None, dtype=tf.float32)
    # init_glorot_uniform = tf.glorot_uniform_initializer()
    # init_glorot_normal = tf.glorot_normal_initializer()

    # x = tf.placeholder(tf.float32, [None, dimensions[0]], name='x')
    x = tf.cast(data, tf.float32)

    current_input = corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)
    noise_input = current_input

    weight_decay_J = 0

    # Build the encoder
    print("========= encoder begin ==========")
    encoder = []
    encoder_b = []
    for layer_i, n_output in enumerate(dimensions[1:]):
        n_input = int(current_input.get_shape()[0])
        print("encoder : layer_i - n_output - n_input", layer_i, n_output,
              n_input)

        #W = tf.Variable(tf.random_uniform([n_output, n_input], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input)))

        W_name = "W1_" + str(layer_i)
        W = tf.get_variable(W_name,
                            shape=[n_output, n_input],
                            initializer=init_uniform_unit)

        b = tf.Variable(tf.zeros([1, n_output]))
        encoder.append(W)
        encoder_b.append(b)
        output = activation(
            tf.transpose(tf.transpose(tf.matmul(W, current_input)) + b))
        current_input = output
        weight_decay_J += (lamb / 2.0) * (tf.reduce_mean(W**2))
    print("========= encoder finish =========")
    # latent representation
    encoder_out = current_input
    print(encoder_out.shape)
    #encoder.reverse()
    # Build the decoder using the same weights
    print("========= decoder begin ==========")
    for layer_i, n_output in enumerate(dimensions[:-1][::-1]):
        print("decoder : layer_i - n_output", layer_i, n_output)
        n_input = int(current_input.get_shape()[0])
        #W = tf.transpose(encoder[layer_i])  # transpose of the weights

        #W = tf.Variable(tf.random_uniform([n_output, n_input], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input)))

        W_name = "W2_" + str(layer_i)
        W = tf.get_variable(W_name,
                            shape=[n_output, n_input],
                            initializer=init_uniform_unit)

        b = tf.Variable(tf.zeros([1, n_output]))

        output = activation(
            tf.transpose(tf.transpose(tf.matmul(W, current_input)) + b))
        current_input = output
        weight_decay_J += (lamb / 2.0) * (tf.reduce_mean(W**2))
    print("========= decoder finish =========")
    # now have the reconstruction through the network
    reconstruction = current_input
    # kl = tf.reduce_mean(-tf.nn.softmax_cross_entropy_with_logits(logits=z, labels=z/0.01))
    #encoder.reverse()
    rhohats = tf.reduce_mean(tf.transpose(encoder_out), 0)

    #p = np.repeat([rho], encoder_out.get_shape().as_list()[0]).astype(np.float32)
    kl = tf.reduce_mean(rho * tf.log(rho / rhohats) +
                        (1 - rho) * tf.log((1 - rho) / (1 - rhohats)))

    #m = data.get_shape().as_list()[1] * 1.0
    ae_loss = (gamma / 2.0) * tf.reduce_mean(tf.square(reconstruction - x))

    kl_loss = beta * kl
    cost = ae_loss + kl_loss + weight_decay_J
    # cost = 0.5 * tf.reduce_sum(tf.square(y - x))

    return {
        'x': x,
        'encoder_out': encoder_out,
        'reconstruction': reconstruction,
        'corrupt_prob': corrupt_prob,
        'cost': cost,
        'noise_input': noise_input,
        'kl': kl,
        'weight_decay_J': weight_decay_J,
        'ae_loss': ae_loss,
        'kl_loss': kl_loss,
        'W_list': encoder,
        'b_list': encoder_b
    }
コード例 #55
0
            return FHNOutputTuple(V, W), FHNStateTuple(V, W)


state = FHNStateTuple(
    tf.placeholder(tf.float32, [batch_size, net_size], name="V"),
    tf.placeholder(tf.float32, [batch_size, net_size], name="W"),
)

cell = FHNCell(net_size, basic_v_relation)

input = tf.placeholder(tf.float32,
                       shape=(batch_size, seq_size, 1, 1),
                       name="Input")
filter = vs.get_variable(
    "E", [L, 1, 1, filters_num],
    initializer=tf.uniform_unit_scaling_initializer(factor=weight_init_factor))

conv_out = tf.nn.conv2d(input,
                        filter,
                        strides=[1, strides, 1, 1],
                        padding='SAME')
conv_out = tf.transpose(conv_out, [1, 0, 2, 3])
conv_out = tf.squeeze(conv_out, squeeze_dims=[2])

net_out, finstate = rnn.dynamic_rnn(cell,
                                    conv_out,
                                    initial_state=state,
                                    time_major=True)

V = tf.expand_dims(net_out.V, 3)
V = tf.transpose(V, [1, 0, 3, 2])
コード例 #56
0
ファイル: rnn_logic.py プロジェクト: windweller/Sempar
    def __init__(self,
                 src_vocab_size,
                 tgt_vocab_size,
                 env_vocab_size,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 dropout,
                 FLAGS,
                 forward_only=False,
                 optimizer="adam"):
        self.size = size
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.env_vocab_size = env_vocab_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.keep_prob_config = 1.0 - dropout
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.keep_prob = tf.placeholder(tf.float32)
        self.source_tokens = tf.placeholder(tf.int32,
                                            shape=[None, None],
                                            name="source_tokens")
        self.target_tokens = tf.placeholder(tf.int32,
                                            shape=[None, None],
                                            name="target_tokens")
        self.source_mask = tf.placeholder(tf.int32,
                                          shape=[None, None],
                                          name="source_mask")
        self.target_mask = tf.placeholder(tf.int32,
                                          shape=[None, None],
                                          name="target_mask")

        self.ctx_tokens = tf.placeholder(tf.int32,
                                         shape=[None, None],
                                         name="ctx_tokens")
        # self.pred_tokens = tf.placeholder(tf.int32, shape=[None, None], name="pred_tokens")
        self.ctx_mask = tf.placeholder(tf.int32,
                                       shape=[None, None],
                                       name="ctx_mask")
        # self.pred_mask = tf.placeholder(tf.int32, shape=[None, None], name="pred_mask")

        self.beam_size = tf.placeholder(tf.int32)
        self.target_length = tf.reduce_sum(self.target_mask,
                                           reduction_indices=0)

        self.FLAGS = FLAGS

        self.decoder_state_input, self.decoder_state_output = [], []
        for i in xrange(num_layers):
            self.decoder_state_input.append(
                tf.placeholder(tf.float32, shape=[None, size]))

        # adding seed, now we fixed the randomness
        with tf.variable_scope("Logic",
                               initializer=tf.uniform_unit_scaling_initializer(
                                   1.0, seed=self.FLAGS.seed)):
            self.setup_embeddings()
            self.setup_encoder()
            # this should be fine...
            if FLAGS.co_attn:
                self.encoder_output = self.rev_coattn_encode()
            elif FLAGS.seq:
                self.encoder_output = self.sequence_encode()
            elif FLAGS.cat_attn:
                self.encoder_output = self.concate_encode()
            else:
                self.encoder_output = self.rev_attention_encode(
                )  # ha, attention is the "normal" case
            self.setup_decoder(self.encoder_output)
            self.setup_loss()

            self.setup_beam()

        params = tf.trainable_variables()
        if not forward_only:
            opt = get_optimizer(optimizer)(self.learning_rate)

            gradients = tf.gradients(self.losses, params)
            clipped_gradients, _ = tf.clip_by_global_norm(
                gradients, max_gradient_norm)
            #   self.gradient_norm = tf.global_norm(clipped_gradients)
            self.gradient_norm = tf.global_norm(gradients)
            self.param_norm = tf.global_norm(params)
            self.updates = opt.apply_gradients(zip(clipped_gradients, params),
                                               global_step=self.global_step)

        self.saver = tf.train.Saver(
            tf.global_variables(),
            max_to_keep=FLAGS.keep)  # write_version=tf.train.SaverDef.V1
コード例 #57
0
def encode_spectrum(encoder_inputs, intensity_inputs_forward,
                    intensity_inputs_backward, decoder_inputs_forward,
                    decoder_inputs_backward, keep_conv, keep_dense):
    """TODO(nh2tran): docstring."""

    with variable_scope.variable_scope("embedding_rnn_seq2seq"):

        # spectra_holder
        layer0 = tf.reshape(encoder_inputs[0],
                            [-1, 1, deepnovo_config.MZ_SIZE, 1])

        # conv1
        conv1_W = variable_scope.get_variable(
            name="conv1_W",
            shape=[1, 4, 1, 4],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        conv1_B = variable_scope.get_variable(
            name="conv1_B",
            shape=[4],
            initializer=tf.constant_initializer(0.1))

        # conv2
        conv2_W = variable_scope.get_variable(
            name="conv2_W",
            shape=[1, 4, 4, 4],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        conv2_B = variable_scope.get_variable(
            name="conv2_B",
            shape=[4],
            initializer=tf.constant_initializer(0.1))

        # dense1
        dense1_input_size = 1 * (deepnovo_config.MZ_SIZE // (4)) * 4
        dense1_output_size = deepnovo_config.embedding_size  # JOON
        dense1_W = variable_scope.get_variable(
            name="dense1_W",
            shape=[dense1_input_size, dense1_output_size],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        dense1_B = variable_scope.get_variable(
            name="dense1_B",
            shape=[dense1_output_size],
            initializer=tf.constant_initializer(0.1))

        # layers
        conv1 = tf.nn.relu(
            tf.nn.conv2d(layer0, conv1_W, strides=[1, 1, 1, 1], padding='SAME')
            + conv1_B)

        conv2 = tf.nn.relu(
            tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='SAME')
            + conv2_B)
        conv2 = tf.nn.max_pool(conv2,
                               ksize=[1, 1, 6, 1],
                               strides=[1, 1, 4, 1],
                               padding='SAME')
        conv2 = tf.nn.dropout(conv2, keep_conv)

        dense1 = tf.reshape(conv2, [-1, dense1_input_size])
        dense1 = tf.nn.relu(tf.matmul(dense1, dense1_W) + dense1_B)
        dense1 = tf.nn.dropout(dense1, keep_dense)

        print('dense1 in encode_spectrum:', dense1)

        # SPECTRUM as Input 0
        encoded_spectrum = dense1

        return embed_labels(encoded_spectrum, intensity_inputs_forward,
                            intensity_inputs_backward, decoder_inputs_forward,
                            decoder_inputs_backward, keep_conv, keep_dense)
コード例 #58
0
def spectrum_cnn2(spectrum):

    # define variables

    with variable_scope.variable_scope("spectrum_cnn2"):

        input_layer = tf.reshape(spectrum, [-1, 1, data_utils.MZ_SIZE, 1])

        W1 = tf.get_variable(
            "W1", [1, 4, 1, 4],
            initializer=tf.contrib.layers.xavier_initializer(seed=0))
        B1 = tf.get_variable("B1", [4],
                             initializer=tf.constant_initializer(0.1))

        W2 = tf.get_variable(
            "W2", [1, 4, 4, 4],
            initializer=tf.contrib.layers.xavier_initializer(seed=0))
        B2 = tf.get_variable("B2", [4],
                             initializer=tf.constant_initializer(0.1))

        Z1 = tf.nn.conv2d(input_layer,
                          W1,
                          strides=[1, 1, 1, 1],
                          padding='SAME')

        A1 = tf.nn.relu(Z1 + B1)

        #        P1 = tf.layers.max_pooling1d( inputs=A1,pool_size=3,strides=3,padding="same")

        Z2 = tf.nn.conv2d(A1, W2, strides=[1, 1, 1, 1], padding='SAME')

        A2 = tf.nn.relu(Z2 + B2)

        P2 = tf.nn.max_pool(A2,
                            ksize=[1, 1, 6, 1],
                            strides=[1, 1, 4, 1],
                            padding="SAME")

        D2 = tf.nn.dropout(P2, .75)

        dense1_input_size = 1 * (data_utils.MZ_SIZE // (4)) * 4
        dense1_output_size = 512

        dense1_W = variable_scope.get_variable(
            name="dense1_W",
            shape=[dense1_input_size, dense1_output_size],
            initializer=tf.uniform_unit_scaling_initializer(1.43))
        dense1_B = variable_scope.get_variable(
            name="dense1_B",
            shape=[dense1_output_size],
            initializer=tf.constant_initializer(0.1))
        #        print(A1.shape)
        #        print(D2.shape)
        Z3 = tf.reshape(D2, [-1, dense1_input_size])

        Z3 = tf.nn.relu(tf.matmul(Z3, dense1_W) + dense1_B)
        #        Z5 = tf.contrib.layers.fully_connected(P4, num_outputs=500,activation_fn=None)

        #        Z5 = tf.nn.relu(Z5)
        Z3 = tf.nn.dropout(Z3, .5)
        #new
        Z5 = tf.contrib.layers.fully_connected(Z3,
                                               num_outputs=100,
                                               activation_fn=None)
        #    #     Z5 = tf.contrib.layers.fully_connected(Z5, num_outputs=50,activation_fn=None)
        #    #     Z5 = tf.nn.sigmoid(Z5)
        #        Z5 = tf.nn.relu(Z5)
        print(Z5.shape)
        Z6 = tf.contrib.layers.fully_connected(
            Z5,
            num_outputs=1,  #data_utils.vocab_size,
            activation_fn=None)
        print(Z6.shape)
    return Z6
コード例 #59
0
def uniform_unit_scaling(params):
    return tf.uniform_unit_scaling_initializer()
コード例 #60
0
    def initialize_attention_func(self, input_size, attention_states):
        # Get shape of attention states (the outputs from the encoder cell)
        attention_states_shape = attention_states.get_shape().as_list()
        attention_size = attention_states_shape[-1]
        attention_length = attention_states_shape[1]

        # Define W_2
        with tf.variable_scope('attention'):
            # Since we unroll the cell state tuples we will have two vectors
            # for each rnn cell (the hidden state vector c_t and the output
            # vector h_t)
            unrolled_state_length = 2 * self.state_size * self.num_cells

            W_2 = tf.get_variable(
                name='W_2',
                shape=[unrolled_state_length, attention_size],
                initializer=tf.uniform_unit_scaling_initializer(),
                dtype=tf.float32)

            b_2 = tf.get_variable(name='b_2',
                                  shape=[attention_size],
                                  initializer=tf.constant_initializer(),
                                  dtype=tf.float32)

            W_3 = tf.get_variable(
                name='W_3',
                shape=[input_size + attention_size, input_size],
                initializer=tf.uniform_unit_scaling_initializer(),
                dtype=tf.float32)

            b_3 = tf.get_variable(name='b_3',
                                  shape=[input_size],
                                  initializer=tf.constant_initializer(),
                                  dtype=tf.float32)

        # Reshape hidden encoder state `h_t`.
        h_t = tf.reshape(attention_states,
                         shape=[-1, attention_length, 1, attention_size])

        k = tf.get_variable(shape=[1, 1, attention_size, attention_size],
                            name='attention_W')

        v = tf.get_variable(shape=[attention_size], name='attention_v')

        # Compute W_1 * h_t using a 1-by-1 convolution
        W1_ht = tf.nn.conv2d(input=h_t,
                             filter=k,
                             strides=[1, 1, 1, 1],
                             padding='SAME',
                             name='W1_ht')

        # Define attention function
        def attention_func(state):
            '''
                Computes attention-weighted context vector c_t from a given
                RNN StateTuple.
            '''
            # If the query is a tuple, flatten it
            # (e.g. when using bidirectional encoder).
            if is_sequence(state):
                query_list = flatten(state)
                state = tf.concat(query_list, axis=1)

            with tf.variable_scope('attention'):
                # Compute W_2 * d_t
                W2_dt = projection(state, W=W_2, b=b_2)
                W2_dt = tf.reshape(W2_dt, [-1, 1, 1, attention_size])

                # Compute attention mask:
                #   v.T * tanh(W_1 * h_t + W_2 * d_t)
                u = tf.reduce_sum(v * tf.tanh(W1_ht + W2_dt), [2, 3])

                # Compute attention mask - alphas
                alpha = tf.nn.softmax(u, name='alpha-weights')

                # Compute the attention-weighted context vector c_t.
                c_t = tf.reduce_sum(
                    tf.reshape(alpha, [-1, attention_length, 1, 1]) * h_t,
                    [1, 2])

            return c_t

        self._attention_func = attention_func
        self._W_3 = W_3
        self._b_3 = b_3