def get_batch_data(is_train=True, batch_size=16): ''' Args: is_train: Boolean. If True, load training data. Otherwise, load validation data. Returns: A Tuple of X batch queues (Tensor), Y batch queues (Tensor), and number of batches (int) ''' # Load data X, Y = load_data(is_train=is_train) # Create Queues input_queues = tf.train.slice_input_producer( [tf.convert_to_tensor(X), tf.convert_to_tensor(Y)]) # create batch queues X_batch, Y_batch = tf.train.shuffle_batch(input_queues, num_threads=8, batch_size=batch_size, capacity=batch_size * 64, min_after_dequeue=batch_size * 32, allow_smaller_final_batch=False) # calc total batch count num_batch = len(X) // batch_size return X_batch, Y_batch, num_batch # (16, 9, 9, 1) int32. cf. Y_batch: (16, 9, 9) int32
def get_batch_data(): '''Makes batch queues from the data. Returns: A Tuple of x (Tensor), y (Tensor). x and y have the shape [batch_size, maxlen]. ''' # Load data X, Y = load_train_data() # Create Queues input_queues = tf.train.slice_input_producer( [tf.convert_to_tensor(X, tf.int64), tf.convert_to_tensor(Y, tf.int64)]) # create batch queues x, y = tf.train.shuffle_batch(input_queues, num_threads=8, batch_size=Hyperparams.batch_size, capacity=Hyperparams.batch_size * 64, min_after_dequeue=Hyperparams.batch_size * 32, allow_smaller_final_batch=False) num_batch = len(X) // Hyperparams.batch_size return x, y, num_batch # (64, 50) int64, (64, 50) int64, 1636
def get_batch_data(): ''' Returns: A Tuple of X batch queues (Tensor), Y batch queues (Tensor), and number of batches (int) ''' # Load data X, Y = load_train_data() char2idx, idx2char = load_vocab() # Make slice x_q, y_q = tf.train.slice_input_producer( [tf.convert_to_tensor(X, tf.int32), tf.convert_to_tensor(Y, tf.int32)]) # Create batch queues x, y = tf.train.shuffle_batch([x_q, y_q], num_threads=8, batch_size=Hp.bs, capacity=Hp.bs * 64, min_after_dequeue=Hp.bs * 32, allow_smaller_final_batch=False) # Get number of mini-batches num_batch = len(X) // Hp.bs return x, y, num_batch
def __init__(self, batch_size=16, set_name='train'): label, mfcc_file = [], [] with open(_data_path + 'preprocess/meta/%s.csv' % set_name) as csv_file: reader = csv.reader(csv_file, delimiter=',') for row in reader: mfcc_file.append(_data_path + 'preprocess/mfcc/' + row[0] + '.npy') label.append(np.asarray(row[1:], dtype=np.int).tostring()) label_t = tf.convert_to_tensor(label) mfcc_file_t = tf.convert_to_tensor(mfcc_file) label_q, mfcc_file_q \ = tf.train.slice_input_producer([label_t, mfcc_file_t], shuffle=True) label_q, mfcc_q = _load_mfcc(source=[label_q, mfcc_file_q], dtypes=[tf.sg_intx, tf.sg_floatx], capacity=256, num_threads=64) batch_queue = tf.train.batch([label_q, mfcc_q], batch_size, shapes=[(None,), (20, None)], num_threads=64, capacity=batch_size*32, dynamic_pad=True) self.label, self.mfcc = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) self.num_batch = len(label) // batch_size
def get_batch_data(is_train=True): '''Returns batch data. Args: is_train: Boolean. If True, it returns batch training data. Otherwise, batch validation data. Returns: A Tuple of x, y, and num_batch x: A `Tensor` of float. Has the shape of (batch_size, 9, 9, 1). y: A `Tensor` of int. Has the shape of (batch_size, 9, 9). num_batch = A Python int. Number of batches. ''' X, Y = load_data(is_train=is_train) # Create Queues input_queues = tf.train.slice_input_producer( [tf.convert_to_tensor(X), tf.convert_to_tensor(Y)]) # create batch queues x, y = tf.train.shuffle_batch(input_queues, num_threads=8, batch_size=Hyperparams.batch_size, capacity=Hyperparams.batch_size * 64, min_after_dequeue=Hyperparams.batch_size * 32, allow_smaller_final_batch=False) # calc total batch count num_batch = len(X) // batch_size return x, y, num_batch # (64, 9, 9, 1), (64, 9, 9), ()
def __init__(self, batch_size=32, name='train'): # load train corpus sources, targets = self._load_corpus(mode='train') # to constant tensor source = tf.convert_to_tensor(sources) target = tf.convert_to_tensor(targets) # create queue from constant tensor source, target = tf.train.slice_input_producer([source, target]) # create batch queue batch_queue = tf.train.shuffle_batch([source, target], batch_size, num_threads=32, capacity=batch_size * 64, min_after_dequeue=batch_size * 32, name=name) # split data self.source, self.target = batch_queue # calc total batch count self.num_batch = len(sources) // batch_size # print info tf.sg_info('Train data loaded.(total data=%d, total batch=%d)' % (len(sources), self.num_batch))
def get_batch_data(mode='train'): '''Makes batch queues from the data. Args: mode: A string. Either 'train', 'val', or 'test' Returns: A Tuple of X_batch (Tensor), Y_batch (Tensor), and number of batches (int). X_batch and Y_batch have of the shape [batch_size, maxlen]. ''' # Load data X, Y = load_data(mode) # Create Queues input_queues = tf.train.slice_input_producer( [tf.convert_to_tensor(X, tf.int32), tf.convert_to_tensor(Y, tf.int32)]) # create batch queues X_batch, Y_batch = tf.train.shuffle_batch( input_queues, num_threads=8, batch_size=Hyperparams.batch_size, capacity=Hyperparams.batch_size * 64, min_after_dequeue=Hyperparams.batch_size * 32, allow_smaller_final_batch=False) # calc total batch count num_batch = len(X) // Hyperparams.batch_size return X_batch, Y_batch, num_batch
def __init__(self, batch_size=16): print "# Make classes" import glob files = glob.glob(Hyperparams.image_fpath) labels = [f.split('/')[-1].split('-')[0] for f in files] # ['scarf2', 'scarf1', ...] self.idx2label = {idx: label for idx, label in enumerate(set(labels))} self.label2idx = {label: idx for idx, label in self.idx2label.items()} labels = [self.label2idx[label] for label in labels] # [3, 4, 6, ...] files = tf.convert_to_tensor(files) #(4480,) (4480,) labels = tf.convert_to_tensor(labels) #(4480,) (4480,) file_q, label_q = tf.train.slice_input_producer( [files, labels], num_epochs=1) # (), () img_q = tf.image.decode_png(tf.read_file(file_q), channels=1) # (576, 576, 1) uint8 img_q = self.transform_image(img_q) # (224, 224, 1) float32 self.x, self.y = tf.train.shuffle_batch( [img_q, label_q], batch_size, num_threads=32, capacity=batch_size * 128, min_after_dequeue=batch_size * 32, allow_smaller_final_batch=False) # (16, 224, 224, 1) (16,)
def get_batch_data(): '''Makes batch queues from the data. ''' # Load data X, Y = load_train_data() # (196947, 1000) int64 # Create Queues x_q, y_q = tf.train.slice_input_producer( [tf.convert_to_tensor(X, tf.int32), tf.convert_to_tensor(Y, tf.int32)]) # (1000,) int32 x_q, y_q = q_process(x_q, y_q) # (50,) int32, () int32 # create batch queues x, y = tf.train.shuffle_batch([x_q, y_q], num_threads=32, batch_size=Hyperparams.batch_size, capacity=Hyperparams.batch_size * 64, min_after_dequeue=Hyperparams.batch_size * 32, allow_smaller_final_batch=False) num_batch = len(X) // Hyperparams.batch_size return x, y, num_batch # (64, 50) int32, (64, 50) int32, ()
def __init__(self, batch_size=16, data_path='asset/data/', vocabulary_loading=False): @tf.sg_producer_func def _load_mfcc(src_list): lab, wav = src_list # label, wave_file # decode string to integer lab = np.fromstring(lab, np.int) # load wave file wav, sr = librosa.load(wav, mono=True) # mfcc mfcc = librosa.feature.mfcc(wav, sr) # return result return lab, mfcc # path for loading just vocabulary if vocabulary_loading: vocabulary_file = __vocabulary_save_dir__ + self.__class__.__name__ + '_vocabulary.npy' if os.path.exists(vocabulary_file): self.index2byte = np.load(vocabulary_file) self.byte2index = {} for i, b in enumerate(self.index2byte): self.byte2index[b] = i self.voca_size = len(self.index2byte) tf.sg_info('VCTK vocabulary loaded.') return # load corpus labels, wave_files = self._load_corpus(data_path) # to constant tensor label = tf.convert_to_tensor(labels) wave_file = tf.convert_to_tensor(wave_files) # create queue from constant tensor label, wave_file = tf.train.slice_input_producer([label, wave_file], shuffle=True) # decode wave file label, mfcc = _load_mfcc(source=[label, wave_file], dtypes=[tf.sg_intx, tf.sg_floatx], capacity=128, num_threads=32) # create batch queue with dynamic pad batch_queue = tf.train.batch([label, mfcc], batch_size, shapes=[(None,), (20, None)], num_threads=32, capacity=batch_size*48, dynamic_pad=True) # split data self.label, self.mfcc = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) # calc total batch count self.num_batch = len(labels) // batch_size # print info tf.sg_info('VCTK corpus loaded.(total data=%d, total batch=%d)' % (len(labels), self.num_batch))
def __init__(self, batch_size=16, data_path='asset/data/', mode='train'): @tf.sg_producer_func def _load_mfcc(src_list): lab, wav = src_list # label, wave_file # decode string to integer lab = np.fromstring(lab, np.int) # load wave file wav, sr = librosa.load(wav, mono=True) # mfcc hl = 512 mfcc = librosa.feature.mfcc(wav, sr, n_mfcc=40,hop_length=hl) mfcc = mfcc[:,:100] # return result return lab, mfcc print("Mode: %s" % mode) # load corpus labels, wave_files, accent_labels = self._load_corpus(data_path, mode=='train') labels = accent_labels labels = np.array(labels) self.labels = labels self.wave_files = wave_files # to constant tensor label = tf.convert_to_tensor(labels) #label = tf.convert_to_tensor(accent_labels) wave_file = tf.convert_to_tensor(wave_files) # create queue from constant tensor label, wave_file = tf.train.slice_input_producer([label, wave_file], shuffle=True) # decode wave file label, mfcc = _load_mfcc(source=[label, wave_file], dtypes=[tf.sg_intx, tf.sg_floatx], capacity=128, num_threads=32) # create batch queue with dynamic pad batch_queue = tf.train.batch([label, mfcc], batch_size, shapes=[(None,), (40, None)], num_threads=32, capacity=batch_size*48, dynamic_pad=True) # split data self.label, self.mfcc = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) # calc total batch count self.num_batch = len(labels) // batch_size # print info tf.sg_info('VCTK corpus loaded.(total data=%d, total batch=%d)' % (len(labels), self.num_batch))
def __init__(self, batch_size=16, set_name='train'): # load meta file label, mfcc_file, filenames = [], [], [] with open(_data_path + 'preprocess/meta/%s.csv' % set_name) as csv_file: reader = csv.reader(csv_file, delimiter=',') for row in reader: # mfcc file filenames.append(row[0]) mfcc_file.append(_data_path + 'preprocess/mfcc/' + row[0] + '.npy') #mfcc_file.append(_data_path + 'preprocess/mfcc-one/' + row[0] + '.npy') # label info ( convert to string object for variable-length support ) label.append(np.asarray(row[1:], dtype=np.int).tostring()) # to constant tensor label_t = tf.convert_to_tensor(label) mfcc_file_t = tf.convert_to_tensor(mfcc_file) filenames_t = tf.convert_to_tensor(filenames) # create queue from constant tensor label_q, mfcc_file_q, filenames_q \ = tf.train.slice_input_producer([label_t, mfcc_file_t, filenames_t], shuffle=False) #= tf.train.slice_input_producer([label_t, mfcc_file_t, filenames_t], shuffle=True) # create label, mfcc queue label_q, mfcc_q, filenames_q = _load_mfcc( source=[label_q, mfcc_file_q, filenames_q], dtypes=[tf.sg_intx, tf.sg_floatx, tf.string], capacity=256, num_threads=64) # create batch queue with dynamic pad batch_queue = tf.train.batch([label_q, mfcc_q, filenames_q], batch_size, shapes=[(None, ), (20, None), ()], num_threads=64, capacity=batch_size * 32, dynamic_pad=True) # split data self.label, self.mfcc, self.filenames = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) # calc total batch count self.num_batch = len(label) // batch_size # print info tf.sg_info('%s set loaded.(total data=%d, total batch=%d)' % (set_name.upper(), len(label), self.num_batch))
def __init__(self, batch_size=16, set_name='learning_batch.csv', data_path="MFCC/"): self._data_path = data_path # load meta file label, mfcc_file = [], [] csv_file = pd.read_csv("learning_batch.csv", header=None) for row in csv_file.iterrows(): file_name, label_temp = row[1] label_temp = map(int, label_temp.split()) mfcc_file.append(self._data_path + file_name) # label info ( convert to string object for variable-length support ) label.append(np.asarray(label_temp, dtype=np.int).tostring()) # to constant tensor label_t = tf.convert_to_tensor(label) mfcc_file_t = tf.convert_to_tensor(mfcc_file) # create queue from constant tensor label_q, mfcc_file_q = tf.train.slice_input_producer( [label_t, mfcc_file_t], shuffle=True) # create label, mfcc queue label_q, mfcc_q = _load_mfcc(source=[label_q, mfcc_file_q], dtypes=[tf.sg_intx, tf.sg_floatx], capacity=256, num_threads=64) # create batch queue with dynamic pad batch_queue = tf.train.batch([label_q, mfcc_q], batch_size, shapes=[(None, ), (20, None)], num_threads=64, capacity=batch_size * 32, dynamic_pad=True) # split data self.label, self.mfcc = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) # calc total batch count self.num_batch = len(label) // batch_size # print info tf.sg_info('%s set loaded.(total data=%d, total batch=%d)' % (set_name.upper(), len(label), self.num_batch))
def train(self): predict = self.forward(Mnist.train.image) #######GP sess = tf.Session() with tf.sg_queue_context(sess): tf.sg_init(sess) trainf = sess.run([Mnist.train.image])[0] n, w, h, c = trainf.shape print trainf.shape np.savetxt('./image.txt', trainf[1, :, :, 0]) #print trainf[1, :, :, 0] #plt.imshow(trainf[1, :, :, 0]) #plt.axis('off') #plt.show() #print type(trainf[1, :, :, 0]) transfer = np.zeros((n, w, h, c)) for i in range(n): candi = random.randint(0, n - 1) #print GP(trainf[i, :, :, 0], trainf[candi, :, :, 0]) #transfer[i, :, :, :] = GP(trainf[i, :, :, :], trainf[candi, :, :, :]) #print trainsfer[i, :, :, :] t = tf.convert_to_tensor(transfer, dtype=tf.float32) gp_predict = predict.sg_reuse(input=t) #print trainf.shape sess.close()
def test(tfname, weightPaths, steps=100000, Var=["NNReg"], lll=2000): tf.Graph() x, y = read_from_tfrecords(tfname, ["source", "target"], 10, [[1070, 3], [1070, 3]]) global_step = tf.Variable(1, trainable=False, name='global_step') print(x.shape, y.shape) x = np.loadtxt('EM.txt', dtype='float32') / 1500 y = np.loadtxt('FM.txt', dtype='float32')[:, :100] / 1500 x = tf.convert_to_tensor(np.expand_dims(np.rollaxis(x, axis=0), axis=0)) y = tf.convert_to_tensor(np.expand_dims(np.rollaxis(y, axis=0), axis=0)) print(x.shape, y.shape) yp = Net(x, x, y) + x tmp_var_list = {} for j in Var: for i in tf.global_variables(): if i.name.startswith(j): tmp_var_list[i.name[:-2]] = i saver = tf.train.Saver(tmp_var_list) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) path = weightPaths + "model.ckpt-{}".format(steps) Sour = [] Targ = [] Trans_S = [] with tf.Session() as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) saver.restore(sess, path) for i in tqdm.tqdm(range(lll)): S, T, TS = sess.run([x, y, yp]) Sour.append(S) Targ.append(T) Trans_S.append(TS) coord.request_stop() coord.join(threads) return Sour, Targ, Trans_S
def __init__(self, mode="train"): ''' Args: mode: A string. Either "train" , "val", or "test" ''' if mode == 'train': self.X_batch, self.Y_batch, self.num_batch = get_batch_data( 'train') else: self.X_batch = tf.placeholder( tf.int32, [Hyperparams.batch_size, Hyperparams.maxlen]) self.Y_batch = tf.placeholder( tf.int32, [Hyperparams.batch_size, Hyperparams.maxlen]) self.X_batch_rev = self.X_batch.sg_reverse_seq() # (8, 100) # make embedding matrix for input characters embed_mat = tf.convert_to_tensor(load_embed_lookup_table()) # embed table lookup X_batch_3d = self.X_batch.sg_lookup( emb=embed_mat).sg_float() # (8, 100, 200) X_batch_rev_3d = self.X_batch_rev.sg_lookup( emb=embed_mat).sg_float() # (8, 100, 200) # 1st biGRU layer gru_fw1 = X_batch_3d.sg_gru(dim=Hyperparams.hidden_dim, ln=True) # (8, 100, 200) gru_bw1 = X_batch_rev_3d.sg_gru(dim=Hyperparams.hidden_dim, ln=True) # (8, 100, 200) gru1 = gru_fw1.sg_concat(target=gru_bw1) # (8, 100, 400) # 2nd biGRU layer gru_fw2 = gru1.sg_gru(dim=Hyperparams.hidden_dim * 2, ln=True) # (8, 100, 400) gru_bw2 = gru1.sg_gru(dim=Hyperparams.hidden_dim * 2, ln=True) # (8, 100, 400) gru2 = gru_fw2.sg_concat(target=gru_bw2) # (16, 100, 800) # fc dense layer reshaped = gru2.sg_reshape(shape=[-1, gru2.get_shape().as_list()[-1]]) logits = reshaped.sg_dense(dim=3) # 1 for space 2 for non-space self.logits = logits.sg_reshape(shape=gru2.get_shape().as_list()[:-1] + [-1]) if mode == 'train': # cross entropy loss with logits ( for training set ) self.loss = self.logits.sg_ce(target=self.Y_batch, mask=True) # accuracy evaluation ( for validation set ) self.X_val_batch, self.Y_val_batch, self.num_batch = get_batch_data( 'val') self.acc = (self.logits.sg_reuse( input=self.X_val_batch).sg_accuracy(target=self.Y_val_batch, name='val'))
def test(self): # predict = self.forward(Mnist.test.image) # acc = (predict.sg_softmax() # .sg_accuracy(target=Mnist.test.label, name='test')) sess = tf.Session() with tf.sg_queue_context(sess): tf.sg_init(sess) testf = sess.run([Mnist.test.image])[0] # print testf.shape n, w, h, c = testf.shape tmp0 = np.zeros((n * w, h)) tmp02 = np.zeros((n * w, h)) tmp05 = np.zeros((n * w, h)) tmp08 = np.zeros((n * w, h)) tmp90 = np.zeros((n * w, h)) tmp_90 = np.zeros((n * w, h)) for i in range(n): tmp0[i * w : (i + 1) * w, 0 : h] = testf[i, :, :, 0] tmp02[i * w : (i + 1) * w, 0 : h] = addnoisy(testf[i, :, :, 0], 0.2) tmp05[i * w : (i + 1) * w, 0 : h] = addnoisy(testf[i, :, :, 0], 0.5) tmp08[i * w : (i + 1) * w, 0 : h] = addnoisy(testf[i, :, :, 0], 0.8) tmp90[i * w : (i + 1) * w, 0 : h] = rotate90(testf[i, :, :, 0]) tmp_90[i * w : (i + 1) * w, 0 : h] = rotate_90(testf[i, :, :, 0])# addnoisy(testf[i, :, :, 0], 0.8) #testf[i, :, :, 0] = addnoisy(testf[i, :, :, 0], 0.0) #testf[i, :, :, 0] = rotate90(testf[i, :, :, 0]) #testf[i, :, :, 0] = rotate_90(testf[i, :, :, 0]) #print testf[i, :, :, 0] np.savetxt('./image0.txt', tmp0) np.savetxt('./image02.txt', tmp02) np.savetxt('./image05.txt', tmp05) np.savetxt('./image08.txt', tmp08) np.savetxt('./image90.txt', tmp90) np.savetxt('./image_90.txt', tmp_90) testf_tensor = tf.convert_to_tensor(testf, dtype=tf.float32) predict = self.forward(testf_tensor) acc = (predict.sg_softmax() .sg_accuracy(target=Mnist.test.label, name='test')) saver=tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(save_dir)) total_accuracy = 0 for i in range(Mnist.test.num_batch): total_accuracy += np.sum(sess.run([acc])[0]) print('Evaluation accuracy: {}'.format(float(total_accuracy)/(Mnist.test.num_batch*batch_size))) # close session sess.close()
def __init__(self, batch_size=16, set_name='train'): # load meta file label, mfcc_file = [], [] with open(_data_path + 'preprocess/meta/%s.csv' % set_name) as csv_file: reader = csv.reader(csv_file, delimiter=',') for row in reader: # mfcc file mfcc_file.append(_data_path + 'preprocess/mfcc/' + row[0] + '.npy') # label info ( convert to string object for variable-length support ) label.append(np.asarray(row[1:], dtype=np.int).tostring()) # to constant tensor label_t = tf.convert_to_tensor(label) mfcc_file_t = tf.convert_to_tensor(mfcc_file) # create queue from constant tensor label_q, mfcc_file_q \ = tf.train.slice_input_producer([label_t, mfcc_file_t], shuffle=True) # create label, mfcc queue label_q, mfcc_q = _load_mfcc(source=[label_q, mfcc_file_q], dtypes=[tf.sg_intx, tf.sg_floatx], capacity=256, num_threads=64) # create batch queue with dynamic pad batch_queue = tf.train.batch([label_q, mfcc_q], batch_size, shapes=[(None,), (20, None)], num_threads=64, capacity=batch_size*32, dynamic_pad=True) # split data self.label, self.mfcc = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) # calc total batch count self.num_batch = len(label) // batch_size # print info tf.sg_info('%s set loaded.(total data=%d, total batch=%d)' % (set_name.upper(), len(label), self.num_batch))
def __init__(self, batch_size=16, set_name='train'): # load meta file label, mfcc_file = [], [] self.daniter_label = [] count = 0 with open(_data_path + 'preprocess/meta/%s.csv' % set_name) as csv_file: reader = csv.reader(csv_file, delimiter=',') for row in reader: # DANITER magic small dataset if count not in [0, 24, 25, 26, 27]: count += 1 if count == max_count: break continue # mfcc file mfcc_file.append(_data_path + 'preprocess/mfcc/' + row[0] + '.npy') # label info ( convert to string object for variable-length support ) label.append(np.asarray(row[1:], dtype=np.int).tostring()) self.daniter_label.append(np.asarray(row[1:], dtype=np.int)) # DANITER print "\n\n\n##### Label ######", count print "".join([ index2byte[letter] for letter in np.asarray(row[1:], dtype=np.int) ]) # DANITER count += 1 if count == max_count: break self.mfcc_file = mfcc_file # to constant tensor label_t = tf.convert_to_tensor(label) mfcc_file_t = tf.convert_to_tensor(mfcc_file) # create queue from constant tensor label_q, mfcc_file_q \ = tf.train.slice_input_producer([label_t, mfcc_file_t], shuffle=True) # create label, mfcc queue label_q, mfcc_q = _load_mfcc(source=[label_q, mfcc_file_q], dtypes=[tf.sg_intx, tf.sg_floatx], capacity=256, num_threads=64) # create batch queue with dynamic pad batch_queue = tf.train.batch([label_q, mfcc_q], batch_size, shapes=[(None, ), (20, None)], num_threads=64, capacity=batch_size * 32, dynamic_pad=True) # split data self.label, self.mfcc = batch_queue # batch * time * dim self.mfcc = self.mfcc.sg_transpose(perm=[0, 2, 1]) # calc total batch count self.num_batch = len(label) // batch_size # print info tf.sg_info('%s set loaded.(total data=%d, total batch=%d)' % (set_name.upper(), len(label), self.num_batch))
def __init__(self, batch_size=8, name='train', path=join(DATA_DIR, 'out_data.data'), b2i={}): if name == "train": print("Loading corpus...") # load train corpus if not b2i: sources, targets = self._load_corpus(mode='train', path=path) else: sources, targets = load_corpus_dict(path, b2i, 0, 150) print("Converting source to tensors...") # to constant tensor source = tf.convert_to_tensor(sources) print("Converting target to tensors...") target = tf.convert_to_tensor(targets) # create queue from constant tensor source, target = tf.train.slice_input_producer([source, target]) # create batch queue batch_queue = tf.train.shuffle_batch([source, target], batch_size, num_threads=4, capacity=batch_size * 8, min_after_dequeue=batch_size * 4, name=name) # split data self.source, self.target = batch_queue # calc total batch count self.num_batch = len(sources) // batch_size # print info tf.sg_info('Train data loaded.(total data=%d, total batch=%d)' % (len(sources), self.num_batch)) if name == "test": print("Loading test corpus...") sources = self._load_corpus(mode='test', path=path) targets = np.zeros_like(sources) print("Converting source to tensors...") # to constant tensor source = tf.convert_to_tensor(sources) print("Converting target to tensors...") target = tf.convert_to_tensor(targets) # create queue from constant tensor source, target = tf.train.slice_input_producer([source, target]) # create batch queue batch_queue = tf.train.shuffle_batch([source, target], batch_size, num_threads=4, capacity=batch_size * 8, min_after_dequeue=batch_size * 4, name=name) # split data self.source, self.target = batch_queue # calc total batch count self.num_batch = len(sources) // batch_size # print info tf.sg_info('Train data loaded.(total data=%d, total batch=%d)' % (len(sources), self.num_batch))