Example #1
0
    def _testSpecialCases(self, use_gpu):
        inp = np.random.rand(4, 4).astype("f")

        with self.test_session(use_gpu=use_gpu) as sess:
            result = sess.run(tf.split_v(inp, [4], 0))
            self.assertAllEqual(result[0], inp)

            result = sess.run(tf.split_v(inp, [-1, 3], 0))
            self.assertAllEqual(result[0], inp[0:1, :])
            self.assertAllEqual(result[1], inp[1:4, :])
Example #2
0
  def _testSpecialCases(self, use_gpu):
    inp = np.random.rand(4, 4).astype("f")

    with self.test_session(use_gpu=use_gpu) as sess:
      result = sess.run(tf.split_v(inp, [4], 0))
      self.assertAllEqual(result[0], inp)

      result = sess.run(tf.split_v(inp, [-1, 3], 0))
      self.assertAllEqual(result[0], inp[0:1, :])
      self.assertAllEqual(result[1], inp[1:4, :])
Example #3
0
  def testExplicitNum(self):
    size_splits = tf.placeholder(dtype=tf.int32, shape=[None])

    value = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

    with self.test_session(use_gpu=False) as sess:
      with self.assertRaises(ValueError) as context:
        sess.run(tf.split_v(value, size_splits), {size_splits: [2, 2, 6]})

      self.assertTrue("Cannot infer num from shape" in str(context.exception))

      result = sess.run(tf.split_v(value, size_splits, num=3),
                        {size_splits: [2, 2, 6]})

    self.assertAllEqual(result[0], value[0:2])
    self.assertAllEqual(result[1], value[2:4])
    self.assertAllEqual(result[2], value[4:])
Example #4
0
    def testExplicitNum(self):
        size_splits = tf.placeholder(dtype=tf.int32, shape=[None])

        value = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        with self.test_session(use_gpu=False) as sess:
            with self.assertRaises(ValueError) as context:
                sess.run(tf.split_v(value, size_splits),
                         {size_splits: [2, 2, 6]})

            self.assertTrue(
                "Cannot infer num from shape" in str(context.exception))

            result = sess.run(tf.split_v(value, size_splits, num=3),
                              {size_splits: [2, 2, 6]})

        self.assertAllEqual(result[0], value[0:2])
        self.assertAllEqual(result[1], value[2:4])
        self.assertAllEqual(result[2], value[4:])
Example #5
0
  def testListOfScalarTensors(self):
    a = tf.to_int32(5)
    b = tf.to_int32(6)

    value = np.random.rand(11, 11)

    with self.test_session(use_gpu=False) as sess:
      result = sess.run(tf.split_v(value, [a, b]))

    self.assertAllEqual(result[0], value[0:5, :])
    self.assertAllEqual(result[1], value[5:, :])
Example #6
0
    def testListOfScalarTensors(self):
        a = tf.to_int32(5)
        b = tf.to_int32(6)

        value = np.random.rand(11, 11)

        with self.test_session(use_gpu=False) as sess:
            result = sess.run(tf.split_v(value, [a, b]))

        self.assertAllEqual(result[0], value[0:5, :])
        self.assertAllEqual(result[1], value[5:, :])
Example #7
0
 def _testHugeNumberOfTensors(self, use_gpu):
   num_split = 10000
   size_splits = np.random.randint(1, 3, num_split)
   shape = [3, np.sum(size_splits)]
   split_dim = 1
   inp = np.random.rand(*shape).astype("f")
   with self.test_session(use_gpu=use_gpu) as sess:
     result = sess.run(tf.split_v(inp, size_splits, split_dim))
   slices = [slice(0, x) for x in shape]
   offset = 0
   for i in range(num_split):
     slices[split_dim] = slice(offset, offset + size_splits[i])
     offset += size_splits[i]
     self.assertAllEqual(result[i], inp[slices])
Example #8
0
  def _testGradientsSimple(self, use_gpu):
    inp = np.random.rand(4, 4).astype("f")
    with self.test_session(use_gpu=use_gpu):
      inp_tensor = tf.convert_to_tensor(inp)
      s = tf.split_v(inp_tensor, [1, 4], 1)
      inp_grads = [
          np.random.rand(4, 1).astype("f"), np.random.rand(4, 3).astype("f")
      ]
      grad_tensors = [tf.constant(x) for x in inp_grads]
      grad = tf.gradients(s, [inp_tensor], grad_tensors)[-1]
      result = grad.eval()

    self.assertAllEqual(result[:, 0:1], inp_grads[0])
    self.assertAllEqual(result[:, 1:4], inp_grads[1])
Example #9
0
  def _testGradientsSimple(self, use_gpu):
    inp = np.random.rand(4, 4).astype("f")
    with self.test_session(use_gpu=use_gpu):
      inp_tensor = tf.convert_to_tensor(inp)
      s = tf.split_v(inp_tensor, [1, 4], 1)
      inp_grads = [
          np.random.rand(4, 1).astype("f"), np.random.rand(4, 3).astype("f")
      ]
      grad_tensors = [tf.constant(x) for x in inp_grads]
      grad = tf.gradients(s, [inp_tensor], grad_tensors)[-1]
      result = grad.eval()

    self.assertAllEqual(result[:, 0:1], inp_grads[0])
    self.assertAllEqual(result[:, 1:4], inp_grads[1])
Example #10
0
 def _testHugeNumberOfTensors(self, use_gpu):
     num_split = 10000
     size_splits = np.random.randint(1, 3, num_split)
     shape = [3, np.sum(size_splits)]
     split_dim = 1
     inp = np.random.rand(*shape).astype("f")
     with self.test_session(use_gpu=use_gpu) as sess:
         result = sess.run(tf.split_v(inp, size_splits, split_dim))
     slices = [slice(0, x) for x in shape]
     offset = 0
     for i in range(num_split):
         slices[split_dim] = slice(offset, offset + size_splits[i])
         offset += size_splits[i]
         self.assertAllEqual(result[i], inp[slices])
Example #11
0
        def unigaussian_loss(y_true, y_pred):
            mix = tf.range(start=0, limit=self.num_mix)
            out_mu, out_sigma, out_pi = tf.split_v(
                split_dim=1,
                size_splits=[
                    self.num_mix * self.output_dim, self.num_mix, self.num_mix
                ],
                value=y_pred,
                name='mdn_coef_split')

            # tf.to_float(out_mu)
            # print('----- ', tf.shape(y_pred)[0].eval(session=K.get_session()))
            # print('----- ', tf.shape(y_pred)[1])/

            def loss_i(i):
                batch_size = tf.shape(out_sigma)[0]
                sigma_i = tf.slice(out_sigma, [0, i], [batch_size, 1],
                                   name='mdn_sigma_slice')
                pi_i = tf.slice(out_pi, [0, i], [batch_size, 1],
                                name='mdn_pi_slice')
                mu_i = tf.slice(out_mu, [0, i * self.output_dim],
                                [batch_size, self.output_dim],
                                name='mdn_mu_slice')

                print('***.....>> ', i * self.output_dim)
                tf.Print(mu_i, [i], ">>>>>>>  ")
                # print('.....>> ', tf.shape(y_true))

                dist = tf.contrib.distributions.Normal(mu=mu_i, sigma=sigma_i)
                loss = dist.pdf(y_true)

                # loss = gaussian_kernel_(y_true, mu_i, sigma_i)

                loss = pi_i * loss

                return loss

            result = tf.map_fn(lambda m: loss_i(m),
                               mix,
                               dtype=tf.float32,
                               name='mix_map_fn')

            result = tf.reduce_sum(result, axis=0, keep_dims=False)
            result = -tf.log(result)
            # result = tf.reduce_mean(result, axis=1)
            result = tf.reduce_mean(result)
            # result = tf.reduce_sum(result)
            return result
Example #12
0
 def _RunAndVerifyScalar(self, use_gpu, large_num_splits=False):
   shape = np.random.randint(0, 5, size=5)
   split_dim = np.random.randint(0, 5)
   if large_num_splits:
     num_split = np.random.randint(16, 25)
   else:
     num_split = np.random.randint(2, 8)
   shape[split_dim] = np.random.randint(2, 5) * num_split
   inp = np.random.rand(*shape).astype("f")
   with self.test_session(use_gpu=use_gpu) as sess:
     result = sess.run(tf.split_v(inp, num_split, split_dim))
   slices = [slice(0, x) for x in shape]
   offset = 0
   length = shape[split_dim] // num_split
   for i in range(num_split):
     slices[split_dim] = slice(offset, offset + length)
     offset += length
     self.assertAllEqual(result[i], inp[slices])
Example #13
0
 def _RunAndVerifyScalar(self, use_gpu, large_num_splits=False):
     shape = np.random.randint(0, 5, size=5)
     split_dim = np.random.randint(0, 5)
     if large_num_splits:
         num_split = np.random.randint(16, 25)
     else:
         num_split = np.random.randint(2, 8)
     shape[split_dim] = np.random.randint(2, 5) * num_split
     inp = np.random.rand(*shape).astype("f")
     with self.test_session(use_gpu=use_gpu) as sess:
         result = sess.run(tf.split_v(inp, num_split, split_dim))
     slices = [slice(0, x) for x in shape]
     offset = 0
     length = shape[split_dim] // num_split
     for i in range(num_split):
         slices[split_dim] = slice(offset, offset + length)
         offset += length
         self.assertAllEqual(result[i], inp[slices])
Example #14
0
 def _RunAndVerify(self, use_gpu, large_num_splits=False):
     # Random dims of rank 5
     shape = np.random.randint(1, 5, size=5)
     split_dim = np.random.randint(0, 5)
     if large_num_splits:
         num_split = np.random.randint(16, 25)
     else:
         num_split = np.random.randint(2, 8)
     size_splits = np.random.randint(2, 8, num_split)
     shape[split_dim] = np.sum(size_splits)
     inp = np.random.rand(*shape).astype("f")
     with self.test_session(use_gpu=use_gpu) as sess:
         result = sess.run(tf.split_v(inp, size_splits, split_dim))
     slices = [slice(0, x) for x in shape]
     offset = 0
     for i in range(num_split):
         slices[split_dim] = slice(offset, offset + size_splits[i])
         offset += size_splits[i]
         self.assertAllEqual(result[i], inp[slices])
Example #15
0
 def _RunAndVerify(self, use_gpu, large_num_splits=False):
   # Random dims of rank 5
   shape = np.random.randint(1, 5, size=5)
   split_dim = np.random.randint(0, 5)
   if large_num_splits:
     num_split = np.random.randint(16, 25)
   else:
     num_split = np.random.randint(2, 8)
   size_splits = np.random.randint(2, 8, num_split)
   shape[split_dim] = np.sum(size_splits)
   inp = np.random.rand(*shape).astype("f")
   with self.test_session(use_gpu=use_gpu) as sess:
     result = sess.run(tf.split_v(inp, size_splits, split_dim))
   slices = [slice(0, x) for x in shape]
   offset = 0
   for i in range(num_split):
     slices[split_dim] = slice(offset, offset + size_splits[i])
     offset += size_splits[i]
     self.assertAllEqual(result[i], inp[slices])
Example #16
0
def build_graph(device, input_shape, output_sizes, axis):
  """Build a graph containing a sequence of batch normalizations.

  Args:
    device: string, the device to run on.
    input_shape: shape of the input tensor.
    output_sizes: size of each output along axis.
    axis: axis to be split along.

  Returns:
    An array of tensors to run()
  """
  with tf.device("/%s:0" % device):
    inp = tf.zeros(input_shape)

    outputs = []
    for _ in range(100):
      outputs.extend(tf.split_v(inp, output_sizes, axis))
    return tf.group(*outputs)
Example #17
0
                                              activation_fn=None)
    if tf.VERSION == '1.3.0':
        outputs = tf.nn.sigmoid(logit)
    elif tf.VERSION == '0.12.1':  #summit's tensorflow version API doc: https://www.tensorflow.org/versions/r0.12/api_docs/
        outputs = tf.sigmoid(logit)
    return outputs


pred = dynamicRNN(x)
if tf.VERSION == '1.3.0':
    pred_qual, pred_ccssm = tf.split(value=pred,
                                     num_or_size_splits=[612, 4],
                                     axis=1)
elif tf.VERSION == '0.12.1':  #summit's tensorflow version API doc: https://www.tensorflow.org/versions/r0.12/api_docs/
    pred_qual, pred_ccssm = tf.split_v(value=pred,
                                       size_splits=[612, 4],
                                       split_dim=1)

# Define loss and optimizer
if tf.VERSION == '1.3.0':
    cost = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
elif tf.VERSION == '0.12.1':  #summit's tensorflow version API doc: https://www.tensorflow.org/versions/r0.12/api_docs/
    cost = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, targets=y))
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(cost)
optimizer = tf.train.AdagradOptimizer(
    learning_rate=learning_rate).minimize(cost)

# Evaluate model - use AUC to evaluate model
if tf.VERSION == '1.3.0':
Example #18
0
    def __init__(self, args):

        #%% model params
        self.rnn_size = args['rnn_size']
        self.train = True if args['mode'] == 'train' else False

        self.nmixtures = args['nmixtures']
        self.batch_size = args[
            'batch_size'] if self.train else 1  # training/sampling specific
        self.tsteps = args[
            'seq_len'] if self.train else 1  # training/sampling specific

        # training params
        self.grad_clip = args['grad_clip']

        # other
        self.evt_vec_len = len(args['events'])
        self.graves_initializer = tf.truncated_normal_initializer(
            mean=0., stddev=.075, seed=None, dtype=tf.float32)
        self.window_b_initializer = tf.truncated_normal_initializer(
            mean=-3.0, stddev=.25, seed=None, dtype=tf.float32)
        input_shape = 4  #(x, y), eos, mode

        self.input_vec_dim = input_shape
        self.output_vec_dim = 3 + self.evt_vec_len  #(x, y), eos, evt

        #%% build the basic recurrent network architecture
        cell_func = tf.nn.rnn_cell.LSTMCell  # could be GRUCell or RNNCell

        cell = cell_func(args['rnn_size'])
        if (self.train and args['keep_prob'] < 1):  # training mode
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=args['keep_prob'])

        cell_multi = tf.nn.rnn_cell.MultiRNNCell([cell] * args['num_layers'],
                                                 state_is_tuple=True)

        if (self.train and args['keep_prob'] < 1):  # training mode
            cell_multi = tf.nn.rnn_cell.DropoutWrapper(
                cell_multi, output_keep_prob=args['keep_prob'])

        #define placeholders for input, output and states
        self.input_data = tf.placeholder(
            dtype=tf.float32, shape=[None, self.tsteps, self.input_vec_dim])
        self.target_data = tf.placeholder(
            dtype=tf.float32, shape=[None, self.tsteps, self.output_vec_dim])

        self.istate = cell_multi.zero_state(batch_size=self.batch_size,
                                            dtype=tf.float32)

        #slice the input volume into separate vols for each tstep
        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, self.tsteps, self.input_data)
        ]
        self.inputs = inputs
        outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs,
                                                        self.istate,
                                                        cell_multi,
                                                        loop_function=None,
                                                        scope='rnnlm')
        self.outputs = outputs

        #%% Mixture Density Network. Dense layer to predict the MDN params
        # params = evt, eos + 6 parameters per Gaussian
        n_out = self.evt_vec_len + 1 + self.nmixtures * 6
        with tf.variable_scope('mdn_dense'):
            mdn_w = tf.get_variable("output_w", [self.rnn_size, n_out],
                                    initializer=self.graves_initializer)
            mdn_b = tf.get_variable("output_b", [n_out],
                                    initializer=self.graves_initializer)

        #concat outputs for efficiency
        output = tf.reshape(tf.concat(1, outputs), [-1, args['rnn_size']])
        output = tf.nn.xw_plus_b(output, mdn_w,
                                 mdn_b)  #data flows through dense nn
        self.final_state = last_state
        self.output = output

        #build mixture density cap on top of second recurrent cell
        def gaussian2d(x1, x2, mu1, mu2, s1, s2, rho):
            # define gaussian mdn (eq 24, 25 from http://arxiv.org/abs/1308.0850)
            x_mu1 = tf.subtract(x1, mu1)
            x_mu2 = tf.subtract(x2, mu2)
            Z = tf.square(tf.div(x_mu1, s1)) + \
                tf.square(tf.div(x_mu2, s2)) - \
                2*tf.div(tf.multiply(rho, tf.multiply(x_mu1, x_mu2)), tf.multiply(s1, s2))
            rho_square_term = 1 - tf.square(rho)
            power_e = tf.exp(tf.div(-Z, 2 * rho_square_term))
            regularize_term = 2 * np.pi * tf.multiply(tf.multiply(s1, s2),
                                                      tf.sqrt(rho_square_term))
            gaussian = tf.div(power_e, regularize_term)
            return gaussian

        def get_loss(pi, x1_data, x2_data, eos_data, evt_data, mu1, mu2,
                     sigma1, sigma2, rho, eos, evt):
            # define loss function (eq 26 of http://arxiv.org/abs/1308.0850)
            gaussian = gaussian2d(x1_data, x2_data, mu1, mu2, sigma1, sigma2,
                                  rho)
            term1 = tf.multiply(gaussian, pi)
            term1 = tf.reduce_sum(term1, 1,
                                  keep_dims=True)  #do inner summation
            term1 = -tf.log(tf.maximum(
                term1, 1e-20))  # some errors are zero -> numerical errors.

            term2 = tf.multiply(eos, eos_data) + tf.multiply(
                1 - eos, 1 - eos_data)  #modified Bernoulli -> eos probability
            term2 = -tf.log(tf.maximum(term2,
                                       1e-20))  #negative log error gives loss

            term3 = tf.nn.sigmoid_cross_entropy_with_logits(evt,
                                                            evt_data,
                                                            name=None)

            return term1, term2, term3

        #transform dense NN outputs into params for MDN
        def get_mdn_coef(Z):
            # returns the tf slices containing mdn dist params (eq 18...23 of http://arxiv.org/abs/1308.0850)
            eos_hat = Z[:, 0:1]  #end of event tokens
            evt_hat = Z[:, 1:self.evt_vec_len + 1]  #evt

            pi_hat, mu1_hat, mu2_hat, sigma1_hat, sigma2_hat, rho_hat = tf.split(
                1, 6, Z[:, self.evt_vec_len + 1:])
            self.pi_hat, self.sigma1_hat, self.sigma2_hat = \
                                        pi_hat, sigma1_hat, sigma2_hat # these are useful for bias method during sampling

            eos = tf.sigmoid(1 * eos_hat)
            pi = tf.nn.softmax(pi_hat)  # softmax z_pi:
            mu1 = mu1_hat
            mu2 = mu2_hat  # leave mu1, mu2 as they are
            sigma1 = tf.exp(sigma1_hat)
            sigma2 = tf.exp(sigma2_hat)  # exp for sigmas
            rho = tf.tanh(rho_hat)  # tanh for rho (squish between -1 and 1)

            return [eos, evt_hat, pi, mu1, mu2, sigma1, sigma2, rho]

        #%% get output
        flat_target_data = tf.reshape(self.target_data,
                                      [-1, self.output_vec_dim])
        self.flat_target_data = flat_target_data
        [x1_data, x2_data, eos_data,
         evt_data] = tf.split_v(flat_target_data, [1, 1, 1, self.evt_vec_len],
                                1)

        [
            self.eos, self.evt, self.pi, self.mu1, self.mu2, self.sigma1,
            self.sigma2, self.rho
        ] = get_mdn_coef(output)

        self.losses = get_loss(self.pi, x1_data, x2_data, eos_data, evt_data, \
                               self.mu1, self.mu2, self.sigma1, self.sigma2, self.rho, \
                               self.eos, self.evt)
        loss = tf.reduce_sum(sum(self.losses))
        self.cost = loss / (self.batch_size * self.tsteps)

        #%%bring together all variables and prepare for training
        self.learning_rate = tf.Variable(0.0, trainable=False)
        self.decay = tf.Variable(0.0, trainable=False)
        self.momentum = tf.Variable(0.0, trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.grad_clip)

        if args['optimizer'] == 'adam':
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate)
        elif args['optimizer'] == 'rmsprop':
            self.optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.learning_rate,
                decay=self.decay,
                momentum=self.momentum)
        else:
            raise ValueError("Optimizer type not recognized")
        self.train_op = self.optimizer.apply_gradients(zip(grads, tvars))
Example #19
0
    def _build_model(self):

        # TODO: None batch_size propagation becomes complicated due to reshaping op later on
        ip_size = 4 * self.seq_len + self.ss1_len + self.ss2_len + self.ss3_len
        self.input_layer_x = tf.placeholder(
            tf.float32, (self.batch_size, ip_size, self.ip_channels),
            'input_layer_x')

        org, lp1, lp2, lp3, ss1, ss2, ss3 = tf.split_v(self.input_layer_x, [
            self.seq_len, self.seq_len, self.seq_len, self.seq_len,
            self.ss1_len, self.ss2_len, self.ss3_len
        ])

        key_list = list(self.layer_params.keys())[::-1]
        values_list = list(self.layer_params.values())[::-1]

        # Gather the kernel parameters and perform 1st convolution layer and concantate
        kernel_width, kernel_op_channel, stride, padding = values_list[0]
        kernel_size = [kernel_width, self.ip_channels, kernel_op_channel]

        org_conv_op = conv_bn_layer(org, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_op')
        lp1_conv_op = conv_bn_layer(lp1, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_lp1')
        lp2_conv_op = conv_bn_layer(lp2, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_lp2')
        lp3_conv_op = conv_bn_layer(lp3, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_lp3')
        ss1_conv_op = conv_bn_layer(ss1, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_ss1')
        ss2_conv_op = conv_bn_layer(ss2, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_ss2')
        ss3_conv_op = conv_bn_layer(ss3, kernel_size, stride, padding,
                                    self.weight_reg, self.mode, 'org_conv_ss3')

        concat_conv_layer = tf.concat(1, [
            org_conv_op, lp1_conv_op, lp2_conv_op, lp3_conv_op, ss1_conv_op,
            ss2_conv_op, ss3_conv_op
        ])

        prev_layer = concat_conv_layer

        # iteratively build the layers
        # TODO: Check with tensorboard if this is being done accurately
        for i in range(1, self.num_layers):
            if key_list[i].split('_')[1] == 'conv':

                logging.info("Building conv layer for " + str(i))
                kernel_width, kernel_op_channel, stride, padding = values_list[
                    i]
                kernel_ip_channel = prev_layer.get_shape()[-1]
                kernel_size = [
                    kernel_width, kernel_ip_channel, kernel_op_channel
                ]
                prev_layer = conv_bn_layer(prev_layer, kernel_size, stride,
                                           padding, self.weight_reg, self.mode,
                                           'conv_' + str(i))

            elif key_list[i].split('_')[1] == 'full':

                logging.info("Building full layer for " + str(i))

                if key_list[i - 1].split('_')[1] != 'full':
                    row, col, channel = prev_layer.get_shape()
                    prev_layer = tf.reshape(prev_layer,
                                            [-1, int(col * channel)])
                    ip_size = col * channel
                    op_size = key_list[i]
                    prev_layer = build_full_layer(prev_layer, ip_size, op_size,
                                                  self.weight_reg,
                                                  'full_' + str(i))
                else:
                    op_size = values_list[i]
                    ip_size = prev_layer.get_shape()[-1]
                    prev_layer = build_full_layer(prev_layer, ip_size, op_size,
                                                  self.weight_reg,
                                                  'full_' + str(i))

            elif key_list[i].split('_')[1] == 'conv_pool':

                logging.info("Building conv_pool layer for " + str(i))
                kernel_width, kernel_op_channel, stride, padding, pool_size = values_list[
                    i]
                kernel_ip_channel = prev_layer.get_shape()[-1]
                kernel_size = [
                    kernel_width, kernel_ip_channel, kernel_op_channel
                ]
                prev_layer = build_cnn_pool_layer(prev_layer, kernel_size,
                                                  stride, padding, pool_size,
                                                  self.weight_reg,
                                                  'conv_pool_' + str(i))

            else:
                raise ValueError("layer specified has not been implemented")

        # need to flatten the output if the final layer is not a fully connected layer
        final_layer = prev_layer
        if key_list[-1].split('_')[1] != 'full':
            row, col, channel = final_layer.get_shape()
            final_layer = tf.reshape(final_layer, [-1, int(col * channel)])

        # softmax output from final layer
        softmax_w = tf.get_variable(
            'softmax_w',
            [np.prod(final_layer.get_shape()[1:]), self.op_channels],
            dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initialization,
            regularizer=tf.contrib.layers.l2_regularizer(self.weight_reg))
        softmax_b = tf.get_variable('softmax_b', [self.op_channels],
                                    dtype=tf.float32)
        self.output = tf.matmul(final_layer, softmax_w) + softmax_b
        self.output_prob = tf.nn.softmax(self.output)
        activation_summary(self.output_prob)
Example #20
0
File: naf.py Project: scturtle/rl
 def tri(l):
     non_diag, diag = tf.split_v(l, [tot - n, n])
     l = tf.concat_v2([non_diag, tf.exp(diag), zero], 0)
     return tf.gather_nd(l, idx)
Example #21
0
    def loss(self, output, v_memory, e_memory, target, mask, attnij,
             attentionij, attention_maskij, g_val):
        """
        cpt loss for a cell
        Args:
            output: tensor(rnn_size)
            delete k_memory: tensor(slot_size, slot_size)
            v_memory: tensor(slot_size, embedding_size)
            target: tensor(target_size)
            mask: tensor(slot_size, 2)
            attnij: list of tensors [slot_size, Tensor(sentence_length)]
            attentionij: tensor [slot_size, sentence_length]
            attention_maskij: tensor[slot_size, 2]
            g_val: tensor[slot_size, 2]
        """
        # _target = [da_type_size, slot1_size, slot2_size, ..., slotn_size]
        _target = tf.split_v(target, self.args.split_sizes, 0)
        da = _target[0]  # da_type vector
        values = _target[1:]  # slot_value vectors

        le = self.args.e_memory_length * self.args.e_memory_size
        lv = self.args.slot_size * self.args.v_memory_size

        # 1st, da_type loss and prob
        _loss1 = 0.0
        # outputx = tf.reshape(output, [1, self.args.rnn_size])
        # vc = tf.reshape(v_memory, [1, self.args.slot_size * self.args.v_memory_size])
        # ov = tf.concat(1, [outputx, vc])
        # vc = tf.reshape(e_memory, [1, self.args.e_memory_length * self.args.e_memory_size])
        ve = tf.reshape(e_memory, [1, le])
        # vv = tf.reshape(v_memory, [1, lv])
        # vc = tf.concat(1, [ve, vv])
        logits = tf.matmul(ve, self.weight_da) + self.bias_da
        _loss1 += tf.nn.softmax_cross_entropy_with_logits(
            tf.squeeze(logits), tf.squeeze(da))
        p = tf.nn.softmax(logits)

        # 2nd, slot_value loss and slot_prob
        _loss2 = 0.0
        sp = []
        for i, item in enumerate(self.args.slots):
            tgt = values[i]
            # kv_concat = tf.concat(0, [self.init_k_memory[i], v_memory[i]])
            logits = tf.matmul(tf.reshape(v_memory[i], [1, self.args.v_memory_size]), self.weight_slot[i]) \
                + self.bias_slot[i]
            # TODO: temporary attention mask
            _loss2 += attention_maskij[i][
                0] * tf.nn.softmax_cross_entropy_with_logits(
                    tf.squeeze(logits), tf.squeeze(tgt))
            sp.append(tf.nn.softmax(logits))

        # 3rd, attention loss
        _loss3 = 0.0
        attentionij = tf.split(0, self.args.slot_size, attentionij)
        attentionij = [tf.squeeze(item) for item in attentionij]
        for i in range(self.args.slot_size):
            _loss3 += attention_maskij[i][0] * self.cross_entropy(
                attentionij[i], attnij[i])

        # 4th, mask loss and msk
        _loss4 = 0.0
        msk = []
        mask = tf.split(0, self.args.slot_size, mask)
        for i, item in enumerate(self.args.slots):
            logits = tf.matmul(ve, self.weight_mask[i]) + self.bias_mask[i]
            _loss4 += tf.nn.softmax_cross_entropy_with_logits(
                tf.squeeze(logits), tf.squeeze(mask[i]))
            msk.append(tf.nn.softmax(tf.squeeze(logits)))

        # 5th, g loss
        _loss5 = 0.0
        atm = tf.reshape(attention_maskij, [self.args.slot_size, 2])
        g_val = tf.reshape(g_val, [self.args.slot_size, 2])
        for i, _ in enumerate(self.args.slots):
            # _loss5 += tf.nn.softmax_cross_entropy_with_logits(g_val[i], atm[i])
            _loss5 += self.cross_entropy(atm[i], g_val[i])
        return _loss1, _loss2, _loss3, _loss4, _loss5, p, sp, msk