def _testSpecialCases(self, use_gpu): inp = np.random.rand(4, 4).astype("f") with self.test_session(use_gpu=use_gpu) as sess: result = sess.run(tf.split_v(inp, [4], 0)) self.assertAllEqual(result[0], inp) result = sess.run(tf.split_v(inp, [-1, 3], 0)) self.assertAllEqual(result[0], inp[0:1, :]) self.assertAllEqual(result[1], inp[1:4, :])
def testExplicitNum(self): size_splits = tf.placeholder(dtype=tf.int32, shape=[None]) value = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] with self.test_session(use_gpu=False) as sess: with self.assertRaises(ValueError) as context: sess.run(tf.split_v(value, size_splits), {size_splits: [2, 2, 6]}) self.assertTrue("Cannot infer num from shape" in str(context.exception)) result = sess.run(tf.split_v(value, size_splits, num=3), {size_splits: [2, 2, 6]}) self.assertAllEqual(result[0], value[0:2]) self.assertAllEqual(result[1], value[2:4]) self.assertAllEqual(result[2], value[4:])
def testExplicitNum(self): size_splits = tf.placeholder(dtype=tf.int32, shape=[None]) value = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] with self.test_session(use_gpu=False) as sess: with self.assertRaises(ValueError) as context: sess.run(tf.split_v(value, size_splits), {size_splits: [2, 2, 6]}) self.assertTrue( "Cannot infer num from shape" in str(context.exception)) result = sess.run(tf.split_v(value, size_splits, num=3), {size_splits: [2, 2, 6]}) self.assertAllEqual(result[0], value[0:2]) self.assertAllEqual(result[1], value[2:4]) self.assertAllEqual(result[2], value[4:])
def testListOfScalarTensors(self): a = tf.to_int32(5) b = tf.to_int32(6) value = np.random.rand(11, 11) with self.test_session(use_gpu=False) as sess: result = sess.run(tf.split_v(value, [a, b])) self.assertAllEqual(result[0], value[0:5, :]) self.assertAllEqual(result[1], value[5:, :])
def _testHugeNumberOfTensors(self, use_gpu): num_split = 10000 size_splits = np.random.randint(1, 3, num_split) shape = [3, np.sum(size_splits)] split_dim = 1 inp = np.random.rand(*shape).astype("f") with self.test_session(use_gpu=use_gpu) as sess: result = sess.run(tf.split_v(inp, size_splits, split_dim)) slices = [slice(0, x) for x in shape] offset = 0 for i in range(num_split): slices[split_dim] = slice(offset, offset + size_splits[i]) offset += size_splits[i] self.assertAllEqual(result[i], inp[slices])
def _testGradientsSimple(self, use_gpu): inp = np.random.rand(4, 4).astype("f") with self.test_session(use_gpu=use_gpu): inp_tensor = tf.convert_to_tensor(inp) s = tf.split_v(inp_tensor, [1, 4], 1) inp_grads = [ np.random.rand(4, 1).astype("f"), np.random.rand(4, 3).astype("f") ] grad_tensors = [tf.constant(x) for x in inp_grads] grad = tf.gradients(s, [inp_tensor], grad_tensors)[-1] result = grad.eval() self.assertAllEqual(result[:, 0:1], inp_grads[0]) self.assertAllEqual(result[:, 1:4], inp_grads[1])
def unigaussian_loss(y_true, y_pred): mix = tf.range(start=0, limit=self.num_mix) out_mu, out_sigma, out_pi = tf.split_v( split_dim=1, size_splits=[ self.num_mix * self.output_dim, self.num_mix, self.num_mix ], value=y_pred, name='mdn_coef_split') # tf.to_float(out_mu) # print('----- ', tf.shape(y_pred)[0].eval(session=K.get_session())) # print('----- ', tf.shape(y_pred)[1])/ def loss_i(i): batch_size = tf.shape(out_sigma)[0] sigma_i = tf.slice(out_sigma, [0, i], [batch_size, 1], name='mdn_sigma_slice') pi_i = tf.slice(out_pi, [0, i], [batch_size, 1], name='mdn_pi_slice') mu_i = tf.slice(out_mu, [0, i * self.output_dim], [batch_size, self.output_dim], name='mdn_mu_slice') print('***.....>> ', i * self.output_dim) tf.Print(mu_i, [i], ">>>>>>> ") # print('.....>> ', tf.shape(y_true)) dist = tf.contrib.distributions.Normal(mu=mu_i, sigma=sigma_i) loss = dist.pdf(y_true) # loss = gaussian_kernel_(y_true, mu_i, sigma_i) loss = pi_i * loss return loss result = tf.map_fn(lambda m: loss_i(m), mix, dtype=tf.float32, name='mix_map_fn') result = tf.reduce_sum(result, axis=0, keep_dims=False) result = -tf.log(result) # result = tf.reduce_mean(result, axis=1) result = tf.reduce_mean(result) # result = tf.reduce_sum(result) return result
def _RunAndVerifyScalar(self, use_gpu, large_num_splits=False): shape = np.random.randint(0, 5, size=5) split_dim = np.random.randint(0, 5) if large_num_splits: num_split = np.random.randint(16, 25) else: num_split = np.random.randint(2, 8) shape[split_dim] = np.random.randint(2, 5) * num_split inp = np.random.rand(*shape).astype("f") with self.test_session(use_gpu=use_gpu) as sess: result = sess.run(tf.split_v(inp, num_split, split_dim)) slices = [slice(0, x) for x in shape] offset = 0 length = shape[split_dim] // num_split for i in range(num_split): slices[split_dim] = slice(offset, offset + length) offset += length self.assertAllEqual(result[i], inp[slices])
def _RunAndVerify(self, use_gpu, large_num_splits=False): # Random dims of rank 5 shape = np.random.randint(1, 5, size=5) split_dim = np.random.randint(0, 5) if large_num_splits: num_split = np.random.randint(16, 25) else: num_split = np.random.randint(2, 8) size_splits = np.random.randint(2, 8, num_split) shape[split_dim] = np.sum(size_splits) inp = np.random.rand(*shape).astype("f") with self.test_session(use_gpu=use_gpu) as sess: result = sess.run(tf.split_v(inp, size_splits, split_dim)) slices = [slice(0, x) for x in shape] offset = 0 for i in range(num_split): slices[split_dim] = slice(offset, offset + size_splits[i]) offset += size_splits[i] self.assertAllEqual(result[i], inp[slices])
def build_graph(device, input_shape, output_sizes, axis): """Build a graph containing a sequence of batch normalizations. Args: device: string, the device to run on. input_shape: shape of the input tensor. output_sizes: size of each output along axis. axis: axis to be split along. Returns: An array of tensors to run() """ with tf.device("/%s:0" % device): inp = tf.zeros(input_shape) outputs = [] for _ in range(100): outputs.extend(tf.split_v(inp, output_sizes, axis)) return tf.group(*outputs)
activation_fn=None) if tf.VERSION == '1.3.0': outputs = tf.nn.sigmoid(logit) elif tf.VERSION == '0.12.1': #summit's tensorflow version API doc: https://www.tensorflow.org/versions/r0.12/api_docs/ outputs = tf.sigmoid(logit) return outputs pred = dynamicRNN(x) if tf.VERSION == '1.3.0': pred_qual, pred_ccssm = tf.split(value=pred, num_or_size_splits=[612, 4], axis=1) elif tf.VERSION == '0.12.1': #summit's tensorflow version API doc: https://www.tensorflow.org/versions/r0.12/api_docs/ pred_qual, pred_ccssm = tf.split_v(value=pred, size_splits=[612, 4], split_dim=1) # Define loss and optimizer if tf.VERSION == '1.3.0': cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y)) elif tf.VERSION == '0.12.1': #summit's tensorflow version API doc: https://www.tensorflow.org/versions/r0.12/api_docs/ cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, targets=y)) #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(cost) optimizer = tf.train.AdagradOptimizer( learning_rate=learning_rate).minimize(cost) # Evaluate model - use AUC to evaluate model if tf.VERSION == '1.3.0':
def __init__(self, args): #%% model params self.rnn_size = args['rnn_size'] self.train = True if args['mode'] == 'train' else False self.nmixtures = args['nmixtures'] self.batch_size = args[ 'batch_size'] if self.train else 1 # training/sampling specific self.tsteps = args[ 'seq_len'] if self.train else 1 # training/sampling specific # training params self.grad_clip = args['grad_clip'] # other self.evt_vec_len = len(args['events']) self.graves_initializer = tf.truncated_normal_initializer( mean=0., stddev=.075, seed=None, dtype=tf.float32) self.window_b_initializer = tf.truncated_normal_initializer( mean=-3.0, stddev=.25, seed=None, dtype=tf.float32) input_shape = 4 #(x, y), eos, mode self.input_vec_dim = input_shape self.output_vec_dim = 3 + self.evt_vec_len #(x, y), eos, evt #%% build the basic recurrent network architecture cell_func = tf.nn.rnn_cell.LSTMCell # could be GRUCell or RNNCell cell = cell_func(args['rnn_size']) if (self.train and args['keep_prob'] < 1): # training mode cell = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=args['keep_prob']) cell_multi = tf.nn.rnn_cell.MultiRNNCell([cell] * args['num_layers'], state_is_tuple=True) if (self.train and args['keep_prob'] < 1): # training mode cell_multi = tf.nn.rnn_cell.DropoutWrapper( cell_multi, output_keep_prob=args['keep_prob']) #define placeholders for input, output and states self.input_data = tf.placeholder( dtype=tf.float32, shape=[None, self.tsteps, self.input_vec_dim]) self.target_data = tf.placeholder( dtype=tf.float32, shape=[None, self.tsteps, self.output_vec_dim]) self.istate = cell_multi.zero_state(batch_size=self.batch_size, dtype=tf.float32) #slice the input volume into separate vols for each tstep inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, self.tsteps, self.input_data) ] self.inputs = inputs outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, self.istate, cell_multi, loop_function=None, scope='rnnlm') self.outputs = outputs #%% Mixture Density Network. Dense layer to predict the MDN params # params = evt, eos + 6 parameters per Gaussian n_out = self.evt_vec_len + 1 + self.nmixtures * 6 with tf.variable_scope('mdn_dense'): mdn_w = tf.get_variable("output_w", [self.rnn_size, n_out], initializer=self.graves_initializer) mdn_b = tf.get_variable("output_b", [n_out], initializer=self.graves_initializer) #concat outputs for efficiency output = tf.reshape(tf.concat(1, outputs), [-1, args['rnn_size']]) output = tf.nn.xw_plus_b(output, mdn_w, mdn_b) #data flows through dense nn self.final_state = last_state self.output = output #build mixture density cap on top of second recurrent cell def gaussian2d(x1, x2, mu1, mu2, s1, s2, rho): # define gaussian mdn (eq 24, 25 from http://arxiv.org/abs/1308.0850) x_mu1 = tf.subtract(x1, mu1) x_mu2 = tf.subtract(x2, mu2) Z = tf.square(tf.div(x_mu1, s1)) + \ tf.square(tf.div(x_mu2, s2)) - \ 2*tf.div(tf.multiply(rho, tf.multiply(x_mu1, x_mu2)), tf.multiply(s1, s2)) rho_square_term = 1 - tf.square(rho) power_e = tf.exp(tf.div(-Z, 2 * rho_square_term)) regularize_term = 2 * np.pi * tf.multiply(tf.multiply(s1, s2), tf.sqrt(rho_square_term)) gaussian = tf.div(power_e, regularize_term) return gaussian def get_loss(pi, x1_data, x2_data, eos_data, evt_data, mu1, mu2, sigma1, sigma2, rho, eos, evt): # define loss function (eq 26 of http://arxiv.org/abs/1308.0850) gaussian = gaussian2d(x1_data, x2_data, mu1, mu2, sigma1, sigma2, rho) term1 = tf.multiply(gaussian, pi) term1 = tf.reduce_sum(term1, 1, keep_dims=True) #do inner summation term1 = -tf.log(tf.maximum( term1, 1e-20)) # some errors are zero -> numerical errors. term2 = tf.multiply(eos, eos_data) + tf.multiply( 1 - eos, 1 - eos_data) #modified Bernoulli -> eos probability term2 = -tf.log(tf.maximum(term2, 1e-20)) #negative log error gives loss term3 = tf.nn.sigmoid_cross_entropy_with_logits(evt, evt_data, name=None) return term1, term2, term3 #transform dense NN outputs into params for MDN def get_mdn_coef(Z): # returns the tf slices containing mdn dist params (eq 18...23 of http://arxiv.org/abs/1308.0850) eos_hat = Z[:, 0:1] #end of event tokens evt_hat = Z[:, 1:self.evt_vec_len + 1] #evt pi_hat, mu1_hat, mu2_hat, sigma1_hat, sigma2_hat, rho_hat = tf.split( 1, 6, Z[:, self.evt_vec_len + 1:]) self.pi_hat, self.sigma1_hat, self.sigma2_hat = \ pi_hat, sigma1_hat, sigma2_hat # these are useful for bias method during sampling eos = tf.sigmoid(1 * eos_hat) pi = tf.nn.softmax(pi_hat) # softmax z_pi: mu1 = mu1_hat mu2 = mu2_hat # leave mu1, mu2 as they are sigma1 = tf.exp(sigma1_hat) sigma2 = tf.exp(sigma2_hat) # exp for sigmas rho = tf.tanh(rho_hat) # tanh for rho (squish between -1 and 1) return [eos, evt_hat, pi, mu1, mu2, sigma1, sigma2, rho] #%% get output flat_target_data = tf.reshape(self.target_data, [-1, self.output_vec_dim]) self.flat_target_data = flat_target_data [x1_data, x2_data, eos_data, evt_data] = tf.split_v(flat_target_data, [1, 1, 1, self.evt_vec_len], 1) [ self.eos, self.evt, self.pi, self.mu1, self.mu2, self.sigma1, self.sigma2, self.rho ] = get_mdn_coef(output) self.losses = get_loss(self.pi, x1_data, x2_data, eos_data, evt_data, \ self.mu1, self.mu2, self.sigma1, self.sigma2, self.rho, \ self.eos, self.evt) loss = tf.reduce_sum(sum(self.losses)) self.cost = loss / (self.batch_size * self.tsteps) #%%bring together all variables and prepare for training self.learning_rate = tf.Variable(0.0, trainable=False) self.decay = tf.Variable(0.0, trainable=False) self.momentum = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip) if args['optimizer'] == 'adam': self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) elif args['optimizer'] == 'rmsprop': self.optimizer = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate, decay=self.decay, momentum=self.momentum) else: raise ValueError("Optimizer type not recognized") self.train_op = self.optimizer.apply_gradients(zip(grads, tvars))
def _build_model(self): # TODO: None batch_size propagation becomes complicated due to reshaping op later on ip_size = 4 * self.seq_len + self.ss1_len + self.ss2_len + self.ss3_len self.input_layer_x = tf.placeholder( tf.float32, (self.batch_size, ip_size, self.ip_channels), 'input_layer_x') org, lp1, lp2, lp3, ss1, ss2, ss3 = tf.split_v(self.input_layer_x, [ self.seq_len, self.seq_len, self.seq_len, self.seq_len, self.ss1_len, self.ss2_len, self.ss3_len ]) key_list = list(self.layer_params.keys())[::-1] values_list = list(self.layer_params.values())[::-1] # Gather the kernel parameters and perform 1st convolution layer and concantate kernel_width, kernel_op_channel, stride, padding = values_list[0] kernel_size = [kernel_width, self.ip_channels, kernel_op_channel] org_conv_op = conv_bn_layer(org, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_op') lp1_conv_op = conv_bn_layer(lp1, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_lp1') lp2_conv_op = conv_bn_layer(lp2, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_lp2') lp3_conv_op = conv_bn_layer(lp3, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_lp3') ss1_conv_op = conv_bn_layer(ss1, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_ss1') ss2_conv_op = conv_bn_layer(ss2, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_ss2') ss3_conv_op = conv_bn_layer(ss3, kernel_size, stride, padding, self.weight_reg, self.mode, 'org_conv_ss3') concat_conv_layer = tf.concat(1, [ org_conv_op, lp1_conv_op, lp2_conv_op, lp3_conv_op, ss1_conv_op, ss2_conv_op, ss3_conv_op ]) prev_layer = concat_conv_layer # iteratively build the layers # TODO: Check with tensorboard if this is being done accurately for i in range(1, self.num_layers): if key_list[i].split('_')[1] == 'conv': logging.info("Building conv layer for " + str(i)) kernel_width, kernel_op_channel, stride, padding = values_list[ i] kernel_ip_channel = prev_layer.get_shape()[-1] kernel_size = [ kernel_width, kernel_ip_channel, kernel_op_channel ] prev_layer = conv_bn_layer(prev_layer, kernel_size, stride, padding, self.weight_reg, self.mode, 'conv_' + str(i)) elif key_list[i].split('_')[1] == 'full': logging.info("Building full layer for " + str(i)) if key_list[i - 1].split('_')[1] != 'full': row, col, channel = prev_layer.get_shape() prev_layer = tf.reshape(prev_layer, [-1, int(col * channel)]) ip_size = col * channel op_size = key_list[i] prev_layer = build_full_layer(prev_layer, ip_size, op_size, self.weight_reg, 'full_' + str(i)) else: op_size = values_list[i] ip_size = prev_layer.get_shape()[-1] prev_layer = build_full_layer(prev_layer, ip_size, op_size, self.weight_reg, 'full_' + str(i)) elif key_list[i].split('_')[1] == 'conv_pool': logging.info("Building conv_pool layer for " + str(i)) kernel_width, kernel_op_channel, stride, padding, pool_size = values_list[ i] kernel_ip_channel = prev_layer.get_shape()[-1] kernel_size = [ kernel_width, kernel_ip_channel, kernel_op_channel ] prev_layer = build_cnn_pool_layer(prev_layer, kernel_size, stride, padding, pool_size, self.weight_reg, 'conv_pool_' + str(i)) else: raise ValueError("layer specified has not been implemented") # need to flatten the output if the final layer is not a fully connected layer final_layer = prev_layer if key_list[-1].split('_')[1] != 'full': row, col, channel = final_layer.get_shape() final_layer = tf.reshape(final_layer, [-1, int(col * channel)]) # softmax output from final layer softmax_w = tf.get_variable( 'softmax_w', [np.prod(final_layer.get_shape()[1:]), self.op_channels], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initialization, regularizer=tf.contrib.layers.l2_regularizer(self.weight_reg)) softmax_b = tf.get_variable('softmax_b', [self.op_channels], dtype=tf.float32) self.output = tf.matmul(final_layer, softmax_w) + softmax_b self.output_prob = tf.nn.softmax(self.output) activation_summary(self.output_prob)
def tri(l): non_diag, diag = tf.split_v(l, [tot - n, n]) l = tf.concat_v2([non_diag, tf.exp(diag), zero], 0) return tf.gather_nd(l, idx)
def loss(self, output, v_memory, e_memory, target, mask, attnij, attentionij, attention_maskij, g_val): """ cpt loss for a cell Args: output: tensor(rnn_size) delete k_memory: tensor(slot_size, slot_size) v_memory: tensor(slot_size, embedding_size) target: tensor(target_size) mask: tensor(slot_size, 2) attnij: list of tensors [slot_size, Tensor(sentence_length)] attentionij: tensor [slot_size, sentence_length] attention_maskij: tensor[slot_size, 2] g_val: tensor[slot_size, 2] """ # _target = [da_type_size, slot1_size, slot2_size, ..., slotn_size] _target = tf.split_v(target, self.args.split_sizes, 0) da = _target[0] # da_type vector values = _target[1:] # slot_value vectors le = self.args.e_memory_length * self.args.e_memory_size lv = self.args.slot_size * self.args.v_memory_size # 1st, da_type loss and prob _loss1 = 0.0 # outputx = tf.reshape(output, [1, self.args.rnn_size]) # vc = tf.reshape(v_memory, [1, self.args.slot_size * self.args.v_memory_size]) # ov = tf.concat(1, [outputx, vc]) # vc = tf.reshape(e_memory, [1, self.args.e_memory_length * self.args.e_memory_size]) ve = tf.reshape(e_memory, [1, le]) # vv = tf.reshape(v_memory, [1, lv]) # vc = tf.concat(1, [ve, vv]) logits = tf.matmul(ve, self.weight_da) + self.bias_da _loss1 += tf.nn.softmax_cross_entropy_with_logits( tf.squeeze(logits), tf.squeeze(da)) p = tf.nn.softmax(logits) # 2nd, slot_value loss and slot_prob _loss2 = 0.0 sp = [] for i, item in enumerate(self.args.slots): tgt = values[i] # kv_concat = tf.concat(0, [self.init_k_memory[i], v_memory[i]]) logits = tf.matmul(tf.reshape(v_memory[i], [1, self.args.v_memory_size]), self.weight_slot[i]) \ + self.bias_slot[i] # TODO: temporary attention mask _loss2 += attention_maskij[i][ 0] * tf.nn.softmax_cross_entropy_with_logits( tf.squeeze(logits), tf.squeeze(tgt)) sp.append(tf.nn.softmax(logits)) # 3rd, attention loss _loss3 = 0.0 attentionij = tf.split(0, self.args.slot_size, attentionij) attentionij = [tf.squeeze(item) for item in attentionij] for i in range(self.args.slot_size): _loss3 += attention_maskij[i][0] * self.cross_entropy( attentionij[i], attnij[i]) # 4th, mask loss and msk _loss4 = 0.0 msk = [] mask = tf.split(0, self.args.slot_size, mask) for i, item in enumerate(self.args.slots): logits = tf.matmul(ve, self.weight_mask[i]) + self.bias_mask[i] _loss4 += tf.nn.softmax_cross_entropy_with_logits( tf.squeeze(logits), tf.squeeze(mask[i])) msk.append(tf.nn.softmax(tf.squeeze(logits))) # 5th, g loss _loss5 = 0.0 atm = tf.reshape(attention_maskij, [self.args.slot_size, 2]) g_val = tf.reshape(g_val, [self.args.slot_size, 2]) for i, _ in enumerate(self.args.slots): # _loss5 += tf.nn.softmax_cross_entropy_with_logits(g_val[i], atm[i]) _loss5 += self.cross_entropy(atm[i], g_val[i]) return _loss1, _loss2, _loss3, _loss4, _loss5, p, sp, msk