def sg_quasi_rnn(tensor, opt): # Split if opt.att: H, Z, F, O = tf.split(tensor, 4, axis=0) # (16, 150, 320) for all else: Z, F, O = tf.split(tensor, 3, axis=0) # (16, 150, 320) for all # step func def step(z, f, o, c): ''' Runs fo-pooling at each time step ''' c = f * c + (1 - f) * z if opt.att: # attention a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H, c)) # alpha. (16, 150) k = (a.sg_expand_dims() * H).sg_sum( axis=1) # attentional sum. (16, 320) h = o * (k.sg_dense(act="linear") + \ c.sg_dense(act="linear")) else: h = o * c return h, c # hidden states, (new) cell memories # Do rnn loop c, hs = 0, [] timesteps = tensor.get_shape().as_list()[1] for t in range(timesteps): z = Z[:, t, :] # (16, 320) f = F[:, t, :] # (16, 320) o = O[:, t, :] # (16, 320) # apply step function h, c = step(z, f, o, c) # (16, 320), (16, 320) # save result hs.append(h.sg_expand_dims(axis=1)) # Concat to return H = tf.concat(hs, 1) # (16, 150, 320) seqlen = tf.to_int32( tf.reduce_sum(tf.sign(tf.abs(tf.reduce_sum(H, axis=-1))), 1)) # (16,) float32 h = tf.reverse_sequence(input=H, seq_lengths=seqlen, seq_dim=1)[:, 0, :] # last hidden state vector if opt.is_enc: H_z = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)), [1, timesteps, 1]) H_f = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)), [1, timesteps, 1]) H_o = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)), [1, timesteps, 1]) concatenated = tf.concat([H, H_z, H_f, H_o], 0) # (16*4, 150, 320) return concatenated else: return H # (16, 150, 320)
def sg_quasi_rnn(tensor, opt): # Split if opt.att: H, Z, F, O = tf.split(axis=0, num_or_size_splits=4, value=tensor) # (16, 150, 320) for all else: Z, F, O = tf.split(axis=0, num_or_size_splits=3, value=tensor) # (16, 150, 320) for all # step func def step(z, f, o, c): ''' Runs fo-pooling at each time step ''' c = f * c + (1 - f) * z if opt.att: # attention a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H, c)) # alpha. (16, 150) k = (a.sg_expand_dims() * H).sg_sum( dims=1) # attentional sum. (16, 150) h = o * (k.sg_dense(act="linear") + c.sg_dense(act="linear")) else: h = o * c return h, c # hidden states, (new) cell memories # Do rnn loop c, hs = 0, [] timesteps = tensor.get_shape().as_list()[1] for t in range(timesteps): z = Z[:, t, :] # (16, 320) f = F[:, t, :] # (16, 320) o = O[:, t, :] # (16, 320) # apply step function h, c = step(z, f, o, c) # (16, 320), (16, 320) # save result hs.append(h.sg_expand_dims(dim=1)) # Concat to return H = tf.concat(axis=1, values=hs) # (16, 150, 320) if opt.is_enc: H_z = tf.tile((h.sg_dense(act="linear").sg_expand_dims(dim=1)), [1, timesteps, 1]) H_f = tf.tile((h.sg_dense(act="linear").sg_expand_dims(dim=1)), [1, timesteps, 1]) H_o = tf.tile((h.sg_dense(act="linear").sg_expand_dims(dim=1)), [1, timesteps, 1]) concatenated = tf.concat(axis=0, values=[H, H_z, H_f, H_o]) # (16*4, 150, 320) return concatenated else: return H # (16, 150, 320)
def __call__(self, tensor, state, scope=None): (prev_c, prev_h) = state # i = input_gate, c = new cell value for update, f = forget_gate, o = output_gate lstm_matrix = self._linear([tensor, prev_h]) i, c, f, o = tf.split(value=lstm_matrix, num_or_size_splits=4, axis=1) if self._ln: with tf.variable_scope("ln_rnn", reuse=True): beta = tf.get_variable('beta') gamma = tf.get_variable('gamma') ln = lambda v: _ln_rnn(v, gamma, beta) if self._ln else v # do rnn loop new_c = prev_c * tf.sigmoid(ln(f)) + tf.sigmoid( ln(i)) * self._activation(ln(c)) new_h = self._activation(new_c) * tf.sigmoid(ln(o)) return (new_c, new_h)
# # hyper parameters # batch_size = 16 # total batch size # # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append( tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt): # encode audio feature logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
def generate(): dev = '/cpu:0' with tf.device(dev): mydir = 'tfrc150char_wrd0704' files = [f for f in listdir(mydir) if isfile(join(mydir, f))] tfrecords_filename = [] tfrecords_filename = [join(mydir, 'short_infer3.tfrecords') ] #[join(mydir, f) for f in tfrecords_filename] tfrecords_filename_inf = [join(mydir, '11_3.tfrecords')] print(tfrecords_filename) filename_queue = tf.train.string_input_producer(tfrecords_filename, num_epochs=num_epochs, shuffle=True, capacity=1) infer_queue = tf.train.string_input_producer(tfrecords_filename_inf, num_epochs=num_epochs, shuffle=True, capacity=1) optim = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.99) # Calculate the gradients for each model tower. tower_grads = [] reuse_vars = False with tf.variable_scope("dec_lstm") as scp: dec_cell = BasicLSTMCell2(Hp.w_emb_size, Hp.rnn_hd, state_is_tuple=True) with tf.variable_scope("contx_lstm") as scp: cell = BasicLSTMCell2(Hp.hd, Hp.rnn_hd, state_is_tuple=True) rnn_cell = tf.contrib.rnn.DropoutWrapper( cell, input_keep_prob=Hp.keep_prob, output_keep_prob=Hp.keep_prob) (words, chars) = read_and_decode(filename_queue, Hp.batch_size * Hp.num_gpus) words_splits = tf.split(axis=0, num_or_size_splits=Hp.num_gpus, value=words) chars_splits = tf.split(axis=0, num_or_size_splits=Hp.num_gpus, value=chars) word_emb = np.loadtxt("glove300d_0704.txt") Hp.word_vs = word_emb.shape[0] # -------------------------------------------------------------------------------- with tf.name_scope('%s_%d' % ("tower", 0)) as scope: rnn_state = tower_infer_enc(chars_splits[0], scope, rnn_cell, dec_cell, word_emb, out_reuse_vars=False, dev='/cpu:0') chars_pl = tf.placeholder(tf.int32, shape=(None, Hp.c_maxlen)) rnn_state_pl1 = [ tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)), tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)) ] rnn_state_pl = tf.contrib.rnn.LSTMStateTuple( rnn_state_pl1[0], rnn_state_pl1[1]) final_ids, rnn_state_dec = tower_infer_dec(chars_pl, scope, rnn_cell, dec_cell, word_emb, rnn_state_pl, out_reuse_vars=False, dev='/cpu:0') # -------------------------------------------------------------------------------- saver = tf.train.Saver(tf.trainable_variables()) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.per_process_gpu_memory_fraction = 0.94 session_config.gpu_options.allow_growth = False restore_dir = 'tnsrbrd/hin17d08m_1313g2' # lec30d07m_1634g2 lec04d07m_2006g2 lec28d07m_1221g2 lec31d07m_1548g2 csv_file = join(restore_dir, time.strftime("hin%dd%mm_%H%M.csv")) csv_f = open(csv_file, 'a') csv_writer = csv.writer(csv_f) with tf.Session(config=session_config) as sess: sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) tf.train.start_queue_runners(sess=sess) saver.restore(sess, tf.train.latest_checkpoint( join(restore_dir, 'last_chpt'))) # lec04d07m_2006g2 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for ep in range(num_epochs): tf.sg_set_infer(sess) rnn_state_val, w_txt, ch_txt = sess.run( [rnn_state, words_splits[0], chars_splits[0]], feed_dict={Hp.keep_prob: 1.0}) predictions = [] #[w_txt[:,2,:]] for idx in range(3): char_inpt = word2char_ids( ids_val) if idx != 0 else ch_txt[:, 2, :] ids_val, rnn_state_val = sess.run( [final_ids, rnn_state_dec], feed_dict={ Hp.keep_prob: 1.0, rnn_state_pl1[0]: rnn_state_val[0], rnn_state_pl1[1]: rnn_state_val[1], chars_pl: char_inpt }) temp = np.zeros((Hp.batch_size, Hp.w_maxlen)) for b in range(Hp.batch_size): stop_ind = np.where(ids_val[b] == 2)[0] if stop_ind.size > 0: stop_ind = stop_ind[0] ids_val[b, stop_ind + 1:] = ids_val[b, stop_ind + 1:] * 0 temp[:, :ids_val.shape[1]] = ids_val predictions.append(temp) # predictions are decode_sent x b x w_maxlen predictions = np.array(predictions) in_batches = [w_txt[b, :, :] for b in range(Hp.batch_size)] res_batches = [ predictions[:, b, :] for b in range(Hp.batch_size) ] for b in range(Hp.batch_size): in_paragraph = idxword2txt(in_batches[b]) print("\n INPUT SAMPLE \n") print(in_paragraph) res_paragraph = idxword2txt(res_batches[b]) print("\n RESULTS \n") print(res_paragraph) csv_writer.writerow([ " ".join(in_paragraph[:3]), " ".join(in_paragraph[3:]), " ".join(res_paragraph) ]) csv_f.close()
def sg_quasi_rnn(tensor, opt): # Split if opt.att: H, Z, F, O = tf.split(axis=0, num_or_size_splits=4, value=tensor) # (b, seqlen, hd) for all else: Z, F, O = tf.split(axis=0, num_or_size_splits=3, value=tensor) # (b, seqlen, hd) for all # step func def step(z, f, o, c): ''' Runs fo-pooling at each time step ''' c = f * c + (1 - f) * z if opt.att: # attention a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H, c)) # alpha. (b, seqlen) k = (a.sg_expand_dims() * H).sg_sum( axis=1) # attentional sum. (b, seqlen) h = o * (k.sg_dense_gpus(act="linear",name = "k%d_%s"%(t,opt.name),dev = opt.dev,reuse=opt.reuse_vars)\ + c.sg_dense_gpus(act="linear",name = "c%d_%s"%(t,opt.name),dev = opt.dev,reuse=opt.reuse_vars)) else: h = o * c return h, c # hidden states, (new) cell memories # Do rnn loop c, hs = 0, [] timesteps = tensor.get_shape().as_list()[1] for t in range(timesteps): z = Z[:, t, :] # (b, hd) f = F[:, t, :] # (b, hd) o = O[:, t, :] # (b, hd) # apply step function h, c = step(z, f, o, c) # (b, hd), (b, hd) # save result hs.append(h.sg_expand_dims(axis=1)) # Concat to return H = tf.concat(hs, 1) # (b, seqlen, hd) if opt.is_enc: H_z = tf.tile( (h.sg_dense_gpus(act="linear", name="z_%s" % (opt.name), dev=opt.dev, reuse=opt.reuse_vars).sg_expand_dims(axis=1)), [1, timesteps, 1]) H_f = tf.tile( (h.sg_dense_gpus(act="linear", name="f_%s" % (opt.name), dev=opt.dev, reuse=opt.reuse_vars).sg_expand_dims(axis=1)), [1, timesteps, 1]) H_o = tf.tile( (h.sg_dense_gpus(act="linear", name="o_%s" % (opt.name), dev=opt.dev, reuse=opt.reuse_vars).sg_expand_dims(axis=1)), [1, timesteps, 1]) concatenated = tf.concat(axis=0, values=[H, H_z, H_f, H_o]) # (b*4, seqlen, hd) return concatenated else: return H # (b, seqlen, hd)
# # hyper parameters # batch_size = 16 # total batch size # # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt): # encode audio feature logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size) # CTC loss
import sugartensor as tf __author__ = '*****@*****.**' # set log level to debug tf.sg_verbosity(10) # batch size batch_size = 128 # MNIST input tensor ( batch size should be adjusted for multiple GPUS ) data = tf.sg_data.Mnist(batch_size=batch_size * tf.sg_gpus()) # split inputs for each GPU tower inputs = tf.split(data.train.image, tf.sg_gpus(), axis=0) labels = tf.split(data.train.label, tf.sg_gpus(), axis=0) # simple wrapping function with decorator for parallel training @tf.sg_parallel def get_loss(opt): # conv layers with tf.sg_context(name='convs', act='relu', bn=True): conv = (opt.input[opt.gpu_index].sg_conv( dim=16, name='conv1').sg_pool().sg_conv( dim=32, name='conv2').sg_pool().sg_conv(dim=32, name='conv3').sg_pool()) # fc layers
# # hyper parameters # batch_size = 16 # total batch size # # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # mfcc_noise feature of audio inputs_noise = tf.split(data.mfcc_noise, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append( tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) def penalize_loss(gamma, lambd, tensor, tensor_n): #gamma * (vector-vector_d)**2 - lamada * (vector dot vector_d)/(nor(vector)*nor(vector_d))