def __init__(self, in_dim, dim, forget_bias=1.0, activation=tf.tanh, ln=True, bias=True, dtype=tf.float32, dev='/cpu:0', batch_size=3): self._in_dim = in_dim self._dim = dim self._forget_bias = forget_bias self._activation = activation self._ln = False self._bias = bias self._dev = dev self._size = self._in_dim * self._dim self._initializer = tf.contrib.layers.xavier_initializer( ) #tf.random_normal_initializer() self._dtype = dtype with tf.device(self._dev): with tf.variable_scope("lstm") as scp: #self.rnn_state = tf.get_variable("rnn_c",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False) #self.rnn_h = tf.get_variable("rnn_h",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False) self.rnn_state, self.rnn_h = tf.zeros( (batch_size, self._dim), dtype=tf.sg_floatx), tf.zeros( (batch_size, self._dim), dtype=tf.sg_floatx) w_i2h = tf.get_variable( 'w_i2h', (self._in_dim, 4 * self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) w_h2h = tf.get_variable( 'w_h2h', (self._dim, 4 * self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) w_b = tf.get_variable( 'w_b', (1, 4 * self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) if self._bias == True else 0.0 if self._ln: with tf.variable_scope("ln_rnn"): beta = tf.get_variable( 'beta', self._dim, dtype=tf.sg_floatx, initializer=tf.constant_initializer(0.0), trainable=True) gamma = tf.get_variable( 'gamma', self._dim, dtype=tf.sg_floatx, initializer=tf.constant_initializer(1.0), trainable=True)
def __init__(self, seqlen, in_dim, dim, forget_bias=1.0, activation=tf.tanh, ln=True, bias=True, dtype=tf.float32, dev='/cpu:0', batch_size=3): self._in_dim = in_dim self._dim = dim self._forget_bias = forget_bias self._activation = activation self._ln = ln self._dev = dev self._seqlen = seqlen self._bias = bias self._size = int(self._in_dim * self._dim) self._initializer = tf.contrib.layers.xavier_initializer( ) #tf.random_normal_initializer() self._dtype = dtype with tf.device(self._dev): with tf.variable_scope("clstm") as scp: #self.crnn_state = tf.get_variable("crnn_c",(batch_size, seqlen, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False) #self.crnn_h = tf.get_variable("crnn_h",(batch_size, seqlen, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False) w_ic = tf.get_variable( 'w_ic', (self._seqlen, self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) w_fc = tf.get_variable( 'w_fc', (self._seqlen, self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) w_oc = tf.get_variable( 'w_oc', (self._seqlen, self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) self.make_states(batch_size)
def wrapper(**kwargs): r"""Manages arguments of `tf.sg_opt`. Args: kwargs: keyword arguments. The wrapped function will be provided with gpu_index argument. """ # parse option opt = tf.sg_opt(kwargs) # loop for all available GPUs res = [] for i in range(sg_gpus()): # specify device with tf.device('/gpu:%d' % i): # give new scope only to operation with tf.name_scope('gpu_%d' % i): # save reuse flag with sg_context(reuse=(True if i > 0 else False)): # call function res.append(func(opt * tf.sg_opt(gpu_index=i))) return res
def generate(): dev = '/cpu:0' with tf.device(dev): mydir = 'tfrc150char_wrd0704' files = [f for f in listdir(mydir) if isfile(join(mydir, f))] tfrecords_filename = [] tfrecords_filename = [join(mydir, 'short_infer3.tfrecords') ] #[join(mydir, f) for f in tfrecords_filename] tfrecords_filename_inf = [join(mydir, '11_3.tfrecords')] print(tfrecords_filename) filename_queue = tf.train.string_input_producer(tfrecords_filename, num_epochs=num_epochs, shuffle=True, capacity=1) infer_queue = tf.train.string_input_producer(tfrecords_filename_inf, num_epochs=num_epochs, shuffle=True, capacity=1) optim = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.99) # Calculate the gradients for each model tower. tower_grads = [] reuse_vars = False with tf.variable_scope("dec_lstm") as scp: dec_cell = BasicLSTMCell2(Hp.w_emb_size, Hp.rnn_hd, state_is_tuple=True) with tf.variable_scope("contx_lstm") as scp: cell = BasicLSTMCell2(Hp.hd, Hp.rnn_hd, state_is_tuple=True) rnn_cell = tf.contrib.rnn.DropoutWrapper( cell, input_keep_prob=Hp.keep_prob, output_keep_prob=Hp.keep_prob) (words, chars) = read_and_decode(filename_queue, Hp.batch_size * Hp.num_gpus) words_splits = tf.split(axis=0, num_or_size_splits=Hp.num_gpus, value=words) chars_splits = tf.split(axis=0, num_or_size_splits=Hp.num_gpus, value=chars) word_emb = np.loadtxt("glove300d_0704.txt") Hp.word_vs = word_emb.shape[0] # -------------------------------------------------------------------------------- with tf.name_scope('%s_%d' % ("tower", 0)) as scope: rnn_state = tower_infer_enc(chars_splits[0], scope, rnn_cell, dec_cell, word_emb, out_reuse_vars=False, dev='/cpu:0') chars_pl = tf.placeholder(tf.int32, shape=(None, Hp.c_maxlen)) rnn_state_pl1 = [ tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)), tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)) ] rnn_state_pl = tf.contrib.rnn.LSTMStateTuple( rnn_state_pl1[0], rnn_state_pl1[1]) final_ids, rnn_state_dec = tower_infer_dec(chars_pl, scope, rnn_cell, dec_cell, word_emb, rnn_state_pl, out_reuse_vars=False, dev='/cpu:0') # -------------------------------------------------------------------------------- saver = tf.train.Saver(tf.trainable_variables()) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.per_process_gpu_memory_fraction = 0.94 session_config.gpu_options.allow_growth = False restore_dir = 'tnsrbrd/hin17d08m_1313g2' # lec30d07m_1634g2 lec04d07m_2006g2 lec28d07m_1221g2 lec31d07m_1548g2 csv_file = join(restore_dir, time.strftime("hin%dd%mm_%H%M.csv")) csv_f = open(csv_file, 'a') csv_writer = csv.writer(csv_f) with tf.Session(config=session_config) as sess: sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) tf.train.start_queue_runners(sess=sess) saver.restore(sess, tf.train.latest_checkpoint( join(restore_dir, 'last_chpt'))) # lec04d07m_2006g2 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for ep in range(num_epochs): tf.sg_set_infer(sess) rnn_state_val, w_txt, ch_txt = sess.run( [rnn_state, words_splits[0], chars_splits[0]], feed_dict={Hp.keep_prob: 1.0}) predictions = [] #[w_txt[:,2,:]] for idx in range(3): char_inpt = word2char_ids( ids_val) if idx != 0 else ch_txt[:, 2, :] ids_val, rnn_state_val = sess.run( [final_ids, rnn_state_dec], feed_dict={ Hp.keep_prob: 1.0, rnn_state_pl1[0]: rnn_state_val[0], rnn_state_pl1[1]: rnn_state_val[1], chars_pl: char_inpt }) temp = np.zeros((Hp.batch_size, Hp.w_maxlen)) for b in range(Hp.batch_size): stop_ind = np.where(ids_val[b] == 2)[0] if stop_ind.size > 0: stop_ind = stop_ind[0] ids_val[b, stop_ind + 1:] = ids_val[b, stop_ind + 1:] * 0 temp[:, :ids_val.shape[1]] = ids_val predictions.append(temp) # predictions are decode_sent x b x w_maxlen predictions = np.array(predictions) in_batches = [w_txt[b, :, :] for b in range(Hp.batch_size)] res_batches = [ predictions[:, b, :] for b in range(Hp.batch_size) ] for b in range(Hp.batch_size): in_paragraph = idxword2txt(in_batches[b]) print("\n INPUT SAMPLE \n") print(in_paragraph) res_paragraph = idxword2txt(res_batches[b]) print("\n RESULTS \n") print(res_paragraph) csv_writer.writerow([ " ".join(in_paragraph[:3]), " ".join(in_paragraph[3:]), " ".join(res_paragraph) ]) csv_f.close()
def tower_infer_dec(chars, scope, rnn_cell, dec_cell, word_emb, rnn_state, out_reuse_vars=False, dev='/cpu:0'): with tf.device(dev): with tf.variable_scope('embatch_size', reuse=True): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) print(chars) ch = chars ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = reuse_vars_enc = True # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) beam_size = 8 reuse_vars = out_reuse_vars greedy = False if greedy: dec_state = rnn_state dec_out = [] d_out = tf.constant([1] * Hp.batch_size) for idx in range(Hp.w_maxlen): w_input = d_out.sg_lookup(emb=emb_word) dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c, h=dec_state.h) with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars): d_out, dec_state = dec_cell(w_input, dec_state) dec_out.append(d_out) d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=idx > 0 or reuse_vars) d_out = tf.squeeze(d_out).sg_argmax() dec_out = tf.stack(dec_out, 1) dec = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=True) return dec.sg_argmax(), rnn_state else: # ------------------ BEAM SEARCH -------------------- dec_state = tf.contrib.rnn.LSTMStateTuple( tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]), tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1])) initial_ids = tf.constant([1] * Hp.batch_size) def symbols_to_logits_fn(ids, dec_state): dec = [] dec_c, dec_h = [], [] # (batch x beam_size x decoded_seq) ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1]) print("dec_state ", dec_state[0].get_shape().as_list()) for ind in range(beam_size): with tf.variable_scope('dec_lstm', reuse=ind > 0 or reuse_vars): w_input = ids[:, ind, -1].sg_lookup(emb=emb_word) dec_state0 = tf.contrib.rnn.LSTMStateTuple( c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :]) dec_out, dec_state_i = dec_cell(w_input, dec_state0) dec_out = tf.expand_dims(dec_out, 1) dec_i = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=ind > 0 or reuse_vars) dec.append(tf.squeeze(dec_i, 1)) dec_c.append(dec_state_i[0]) dec_h.append(dec_state_i[1]) return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple( tf.stack(dec_c, 1), tf.stack(dec_h, 1)) final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn, dec_state, initial_ids, beam_size, Hp.w_maxlen - 1, Hp.word_vs, 3.5, eos_id=2) return final_ids[:, 0, :], rnn_state
def tower_infer_enc(chars, scope, rnn_cell, dec_cell, word_emb, out_reuse_vars=False, dev='/cpu:0'): out_rvars = out_reuse_vars # make embedding matrix for source and target with tf.device(dev): with tf.variable_scope('embatch_size', reuse=out_reuse_vars): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) chars = tf.cast(chars, tf.int32) time = tf.constant(0) inputs = tf.transpose(chars, perm=[1, 0, 2]) input_ta = tensor_array_ops.TensorArray(tf.int32, size=tf.shape(chars)[1], dynamic_size=True, clear_after_read=True) chars_sent = input_ta.unstack(inputs) #each element is (batch, sentlen) resp_steps = tf.shape(chars)[1] # number of sentences in paragraph statm_steps = resp_steps // 2 rnn_state = rnn_cell.zero_state( Hp.batch_size, tf.float32) #rnn_cell.rnn_state, rnn_cell.rnn_h maxdecode = 3 # -------------------------------------------- STATEMENT ENCODING ----------------------------------------------- def rnn_cond_stat(time, rnn_state): return tf.less(time, statm_steps - 1) def rnn_body_stat(time, rnn_state): ch = chars_sent.read(time) ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = out_reuse_vars # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) return (time + 1, rnn_state) loop_vars_stat = [time, rnn_state] time, rnn_state = tf.while_loop\ (rnn_cond_stat, rnn_body_stat, loop_vars_stat, swap_memory=False) return rnn_state
def sg_optim(loss, **kwargs): r"""Applies gradients to variables. Args: loss: A 0-D `Tensor` containing the value to minimize. list of 0-D tensor for Multiple GPU kwargs: optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'. lr: A Python Scalar (optional). Learning rate. Default is .001. beta1: A Python Scalar (optional). Default is .9. beta2: A Python Scalar (optional). Default is .99. momentum : A Python Scalar for RMSProp optimizer (optional). Default is 0. category: A string or string list. Specifies the variables that should be trained (optional). Only if the name of a trainable variable starts with `category`, it's value is updated. Default is '', which means all trainable variables are updated. """ opt = tf.sg_opt(kwargs) # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, momentum=0., category='') # select optimizer if opt.optim == 'MaxProp': optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2) elif opt.optim == 'AdaMax': optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) elif opt.optim == 'Adam': optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) elif opt.optim == 'RMSProp': optim = tf.train.RMSPropOptimizer(learning_rate=opt.lr, decay=opt.beta1, momentum=opt.momentum) else: optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr) # get trainable variables if isinstance(opt.category, (tuple, list)): var_list = [] for cat in opt.category: var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)]) else: var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)] # # calc gradient # # multiple GPUs case if isinstance(loss, (tuple, list)): gradients = [] # loop for each GPU tower for i, loss_ in enumerate(loss): # specify device with tf.device('/gpu:%d' % i): # give new scope only to operation with tf.name_scope('gpu_%d' % i): # add gradient calculation operation for each GPU tower gradients.append(tf.gradients(loss_, var_list)) # averaging gradient gradient = [] for grad in zip(*gradients): gradient.append(tf.add_n(grad) / len(loss)) # single GPU case else: gradient = tf.gradients(loss, var_list) gradient, _ = tf.clip_by_global_norm(gradient, opt.clip_grad_norm) # gradient update op with tf.device('/gpu:0'): grad_var = [(g, v) for g, v in zip(gradient, var_list)] grad_op = optim.apply_gradients(grad_var, global_step=tf.sg_global_step()) # add summary using last tower value for g, v in grad_var: # exclude batch normal statics if 'mean' not in v.name and 'variance' not in v.name \ and 'beta' not in v.name and 'gamma' not in v.name: tf.sg_summary_gradient(v, g) # extra update ops within category ( for example, batch normal running stat update ) if isinstance(opt.category, (tuple, list)): update_op = [] for cat in opt.category: update_op.extend([t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(cat)]) else: update_op = [t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(opt.category)] return tf.group(*([grad_op] + update_op))
def train_loop(): with tf.device("/cpu:0"): # Launch the graph with tf.Session(graph=graph, config=config) as sess: print("Starting Tensorboard...") initstart = time.time() train_writer = tf.summary.FileWriter(logs_path + '/TRAIN', graph=sess.graph) test_writer = tf.summary.FileWriter(logs_path + '/TEST', graph=sess.graph) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, output_partition_graphs=True) run_metadata = tf.RunMetadata() tf.global_variables_initializer().run() saver = tf.train.Saver() #Load paths for curr_epoch in range(num_epochs): print('>>>', time.strftime('[%H:%M:%S]'), 'Epoch', curr_epoch + 1, '/', num_epochs) train_cost = train_ler = 0 start = t_time = time.time() index_list = range(0, datasetsize) for batch in range(num_batches_per_epoch): # Getting the index indexes = random.sample(index_list, batchsize) index_list = [x for x in index_list if x not in indexes] train_inputs = next_miniBatch(indexes, dr[0]) train_targets = next_target_miniBatch(indexes, dr[1]) #train_inputs,train_targets = fake_data(num_examples,num_mfccs,num_classes-1) newindex = [i % num_examples for i in range(batchsize)] random.shuffle(newindex) batch_train_inputs = train_inputs[newindex] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences( batch_train_inputs) #for x in range(batchsize): # print('>>>'+str(x)+': ',train_targets[newindex][x].size,batch_train_seq_len[x],dr[0][x]) # print(decode_to_chars(train_targets[newindex][x])) #if train_targets[newindex][x].size > batch_train_seq_len[x]: # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from( train_targets[newindex]) #saveImg(batch_train_inputs) feed = { inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len } batch_cost, _, l = sess.run( [cost, train_optimizer, ler], feed, options=run_options) #,run_metadata = run_metadata) train_cost += batch_cost * batchsize train_ler += l * batchsize print('[' + str(curr_epoch) + ']', ' >>>', time.strftime('[%H:%M:%S]'), 'Batch', batch + 1, '/', num_batches_per_epoch, '@Cost', batch_cost, 'Time Elapsed', time.time() - t_time, 's') t_time = time.time() if (batch % 16 == 0): summary = sess.run( merged, feed_dict=feed, options=run_options) #,run_metadata=run_metadata) train_writer.add_summary( summary, int(batch + (curr_epoch * num_batches_per_epoch))) #train_writer.add_run_metadata(run_metadata, 'step%03d' % int(batch+(curr_epoch*num_batches_per_epoch))) train_writer.flush() # Metrics mean train_cost /= num_examples train_ler /= num_examples #Testing print('>>>', time.strftime('[%H:%M:%S]'), 'Evaluating Test Accuracy...') t_index = random.sample(range(0, testsetsize), testbatchsize) test_inputs = next_miniBatch(t_index, t_dr[0], test=True) test_targets = next_target_miniBatch(t_index, t_dr[1]) newindex = [i % testbatchsize for i in range(testbatchsize)] batch_test_inputs = test_inputs[newindex] batch_test_inputs, batch_test_seq_len = pad_sequences( batch_test_inputs, test=True) batch_test_targets = sparse_tuple_from(test_targets[newindex]) t_feed = { inputs: batch_test_inputs, targets: batch_test_targets, seq_len: batch_test_seq_len } test_ler, d = sess.run( (ler, decoded[0]), feed_dict=t_feed, options=run_options) #,run_metadata = run_metadata) dense_decoded = tf.sparse_tensor_to_dense( d, default_value=-1).eval(session=sess) for i, seq in enumerate(dense_decoded): seq = [s for s in seq if s != -1] tmp_o = decode_to_chars(test_targets[i]) tmp_d = decode_to_chars(seq) print('Sequence %d' % i) print('\t Original:\n%s' % tmp_o) print('\t Decoded:\n%s' % tmp_d) #print('\t Corrected:\n%s' % tmp_corr) print('Done!') log = "Epoch {}/{} | Batch Cost : {:.3f} | Train Accuracy : {:.3f}% | Test Accuracy : {:.3f}% | Time Elapsed : {:.3f}s" print( log.format(curr_epoch + 1, num_epochs, train_cost, 100 - (train_ler * 100), 100 - (test_ler * 100), time.time() - start)) t_summary = sess.run( merged, feed_dict=t_feed, options=run_options) #, run_metadata=run_metadata) test_writer.add_summary( t_summary, int(batch + (curr_epoch * num_batches_per_epoch))) #test_writer.add_run_metadata(run_metadata, 'step%03d' % int(batch+(curr_epoch*num_batches_per_epoch))) test_writer.flush() save_path = saver.save(sess, savepath + '/model') print(">>> Model saved succesfully") print('Total Training Time: ' + str(time.time() - initstart) + 's')
print('[OK] sys ') import random print('[OK] random ') import numpy as np print('[OK] numpy ') import string import glob print('[OK] glob ') import os print('[OK] os ') os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["CUDA_VISIBLE_DEVICES"] = '0' config = tf.ConfigProto() with tf.device("/cpu:0"): # Network Params # num_mfccs = 13 num_classes = 28 num_hidden = 512 learning_rate = 1e-3 momentum = 0.9 decay = 0.9 num_layers = 2 input_noise = True noise_magnitude = 0.01 dataset = 'LibriSpeech' #[TIMIT / LibriSpeech] ############## #PARAMS