def load_data(train_data_path, valid_data_path, test_data_path, seq_length): data_loader = DataLoader(train_data_path, valid_data_path, test_data_path, seq_length=seq_length) data_loader.format() return data_loader.train_len, data_loader.train_data, data_loader.valid_len, \ data_loader.valid_data, data_loader.test_len, data_loader.test_data
class EvalCallback(Callback): def _setup_graph(self): self.pred = self.trainer.get_predictor(get_eval_input_names(), get_eval_output_names()) self.data_loader = DataLoader(audio_meta, hp.eval.batch_size).dataflow() def _trigger_epoch(self): _, mel_spec, speaker_id = next(self.data_loader.get_data()) acc, = self.pred(mel_spec, speaker_id) self.trainer.monitors.put_scalar('eval/accuracy', acc)
""" Load a subset of data required by configure file (cfg_target) """ import cfg_target from data_load import DataLoader data = DataLoader(cfg_target) target = data.data_download_target() covariates_us, covariates_sea, covariates_global, spatial_covariates, temporal_covariates = data.data_download_cov()
def _setup_graph(self): self.pred = self.trainer.get_predictor( get_eval_input_names(), get_eval_output_names()) self.data_loader = DataLoader(audio_meta, hp.eval.batch_size).dataflow()
parser.add_argument('-ckpt', help='checkpoint to load model.') parser.add_argument('-gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('-r', action='store_true', help='start training from the beginning.') parser.add_argument('-remote', action='store_true', help='use remote dataflow.') parser.add_argument('-port', type=int, default=0) args = parser.parse_args() # set hyper-parameters from yaml file hp.set_hparam_yaml(case=args.case) # dataflow audio_meta = AudioMeta(hp.train.data_path) if args.remote: df = get_remote_dataflow(args.port, hp.train.batch_size) else: df = DataLoader(audio_meta, hp.train.batch_size).dataflow(nr_prefetch=5000, nr_thread=int(multiprocessing.cpu_count() // 1.5)) # set logger for event and model saver logger.set_logger_dir(hp.logdir) if True: train_conf = TrainConfig( model=ClassificationModel(num_classes=audio_meta.num_speaker, **hp.model), data=FlexibleQueueInput(df, capacity=500), callbacks=[ ModelSaver(checkpoint_dir=hp.logdir), EvalCallback() ], steps_per_epoch=hp.train.steps_per_epoch, # session_config=session_config )
parser.add_argument('--y', action='store_true') parser.add_argument('--evind', action='store_true') parser.add_argument('--all', action='store_true') parser.add_argument('vars', nargs=argparse.REMAINDER) args = parser.parse_args() base_dir = 'data/' if len(args.vars) == 0: print('no base dir provided, use ./data as default') else: base_dir = args.vars[0] du = DataLoader() du.load_data(sub_sample=False) if args.brand or args.all: gen_vectorized_data(du, ['brand_code'], 'brand', base_dir) if args.model or args.all: gen_vectorized_data(du, ['model_code'], 'model', base_dir) if args.label or args.all: gen_vectorized_data(du, ['label_id_bag'], 'label', base_dir) if args.app or args.all: gen_vectorized_data(du, ['app_id_bag'], 'appid', base_dir) if args.term or args.all: gen_term_data(du, base_dir) if args.y or args.all: gen_y(du, base_dir) if args.evind or args.all:
def setUp(self): # pylint: disable=g-missing-super-call self.loader = DataLoader("./data/train", "./data/valid", "./data/test", seq_length=512)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1, help='the gpu will be used, e.g "0,1,2,3"') parser.add_argument('--max_iter', type=int, default=10, help='number of iterations') parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations') parser.add_argument('--test', type=bool, default=False, help='enable testing') parser.add_argument('--train_test', type=bool, default=True, help='enable testing') parser.add_argument('--show', type=bool, default=True, help='print progress') parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std') parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate') parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay') parser.add_argument( '--final_lr', type=float, default=1E-5, help='learning rate will not decrease after hitting this threshold') parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate') parser.add_argument('--max_grad_norm', type=float, default=3.0, help='maximum gradient norm') parser.add_argument('--hidden_dim', type=int, default=128, help='hidden layer dimension') parser.add_argument('--n_hidden', type=int, default=2, help='hidden numbers') dataset = 'assist2009_updated' if dataset == 'oj': parser.add_argument('--batch_size', type=int, default=5, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=68, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/oj', help='data directory') parser.add_argument('--data_name', type=str, default='oj', help='data set name') parser.add_argument('--load', type=str, default='oj', help='model file to load') parser.add_argument('--save', type=str, default='oj', help='path to save model') elif dataset == 'assistments': parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=124, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/assistments', help='data directory') parser.add_argument('--data_name', type=str, default='assistments', help='data set name') parser.add_argument('--load', type=str, default='assistments', help='model file to load') parser.add_argument('--save', type=str, default='assistments', help='path to save model') elif dataset == 'assist2009_updated': parser.add_argument('--batch_size', type=int, default=32, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=110, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='../../dataset/assist2009_updated', help='data directory') parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name') parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load') parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model') elif dataset == 'STATICS': parser.add_argument('--batch_size', type=int, default=10, help='the batch size') parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions') parser.add_argument( '--n_question', type=int, default=1223, help='the number of unique questions in the dataset') parser.add_argument('--seqlen', type=int, default=800, help='the allowed maximum length of a sequence') parser.add_argument('--data_dir', type=str, default='./data/STATICS', help='data directory') parser.add_argument('--data_name', type=str, default='STATICS', help='data set name') parser.add_argument('--load', type=str, default='STATICS', help='model file to load') parser.add_argument('--save', type=str, default='STATICS', help='path to save model') params = parser.parse_args() params.lr = params.init_lr print(params) dat = DataLoader(',', params.seqlen, 1, 0) # dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',') # train_data_path = params.data_dir + "/" + "builder_train.csv" # valid_data_path = params.data_dir + "/" + "builder_test.csv" train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv" valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv" # test_data_path = params.data_dir + "/" + params.data_name + "_test.csv" max_length, min_length, max_q_id = dat.scan_file(train_data_path) train_q_data, train_q_t_data, train_answer_data = dat.prepare_model_data( train_data_path, max_q_id) train_q_data = np.array(train_q_data) print(train_q_data.shape) train_q_t_data = np.array(train_q_t_data) train_answer_data = np.array(train_answer_data) valid_q_data, valid_q_t_data, valid_answer_data = dat.prepare_model_data( valid_data_path, max_q_id) valid_q_data = np.array(valid_q_data) valid_q_t_data = np.array(valid_q_t_data) valid_answer_data = np.array(valid_answer_data) # train_q_data, train_q_t_data, train_answer_data = dat.load_data(train_data_path) # valid_q_data, valid_q_t_data, valid_answer_data = dat.load_data(valid_data_path) # test_q_data, test_q_t_data, test_answer_data = dat.load_data(test_data_path) model = MODEL(n_question=params.n_question, hidden_dim=params.hidden_dim, x_embed_dim=params.qa_embed_dim, hidden_layers=params.n_hidden, gpu=params.gpu) model.init_embeddings() model.init_params() # model = torch.load(params.data_dir + "/save/"+params.save) # optimizer = optim.SGD(params=model.parameters(), lr=params.lr, momentum=params.momentum) optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9)) if params.gpu >= 0: print('device: ' + str(params.gpu)) torch.cuda.set_device(params.gpu) model.cuda() all_train_loss = {} all_train_accuracy = {} all_train_auc = {} all_valid_loss = {} all_valid_accuracy = {} all_valid_auc = {} best_valid_auc = 0 for idx in range(params.max_iter): train_loss, train_accuracy, train_auc = train(model, idx, params, optimizer, train_q_data, train_q_t_data, train_answer_data) print( 'Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy)) valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_q_t_data, valid_answer_data) print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (idx + 1, params.max_iter, valid_auc, valid_accuracy)) # test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_q_t_data, # test_answer_data) # print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % ( # idx + 1, params.max_iter, test_auc, test_accuracy)) all_train_auc[idx + 1] = train_auc all_train_accuracy[idx + 1] = train_accuracy all_train_loss[idx + 1] = train_loss all_valid_loss[idx + 1] = valid_loss all_valid_accuracy[idx + 1] = valid_accuracy all_valid_auc[idx + 1] = valid_auc # # output the epoch with the best validation auc if valid_auc > best_valid_auc: print('%3.4f to %3.4f' % (best_valid_auc, valid_auc)) best_valid_auc = valid_auc
post_optimizer = self.build_optimizer(post_loss, "post_optimizer") inputs = {"initial_state": initial_state, "encoder_inputs": encode_inputs, "encoder_inputs_len": encode_inputs_len, "pre_decoder_inputs": pre_decode_inputs, "pre_decoder_inputs_len": pre_decode_inputs_len, "pre_decoder_targets": pre_decode_targets, "post_decoder_inputs": post_decode_inputs, "post_decoder_inputs_len": post_decode_inputs_len, "post_decoder_targets": post_decode_targets } pre_decoder = {"pre_optimizer": pre_optimizer, "pre_loss": pre_loss, "pre_state": pre_final_state} post_decoder = {"post_optimizer": post_optimizer, "post_loss": post_loss, "post_state": post_final_state} return inputs, pre_decoder, post_decoder vocab_size = 50000 epoch_num = 1000000 batch_size = 128 data_loader = DataLoader("doupocangqiong.txt", data_file_format="utf8", vocabulary_size=vocab_size, stop_word_file="stop_words.txt", use_jieba=True) #data_loader = DataLoader("abc_news.txt", data_file_format="ascii", vocabulary_size=vocab_size) data_loader.load() def main(): sen2vec = SkipThroughSen2vec(vocab_size=vocab_size, embedding_dim=64, num_units=64, batch_size=batch_size, learning_rate=1.0) inputs, pre_decoder, post_decoder = sen2vec.build() sess = tf.Session() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) new_state = sess.run(inputs['initial_state']) step = 0 while step < epoch_num: cur_inputs, cur_inputs_len, pre_inputs, pre_inputs_len, pre_targets, post_inputs, post_inputs_len, post_targets = data_loader.generate_skip_through_batch(batch_size) feed = {inputs["initial_state"]: new_state,
from pca2d import PCA2D from cnn_svm import CNN_SVM import numpy as np import cv2 as cv2 os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import seaborn as sn from sklearn import metrics sn.set(style='white', context='notebook', palette='deep') cur_dataset = Dataset.TU #cur_dataset = Dataset.YALE_EX_CROPPED #Yale Cropped dataloader = DataLoader(cur_dataset, {'angle_limit': 10, 'img_format': None}) X, y, z, v = dataloader.load_data(reload=False) #X = X / 255 plt.figure(1) g = sn.countplot(y) #z is the number of classes #v is the map of labels to which 0-num_classes correspond num_val_splits = 10 labels = list(range(0, z)) #CNN + SVM 10-fold crossvalidation X_shape = (X.shape[1], X.shape[2], X.shape[3]) #X_shape = (X.shape[0], X.shape[1], X.shape[2]) # Yale Cropped
parser.add_argument('-ckpt', help='checkpoint to load model.') args = parser.parse_args() hp.set_hparam_yaml(args.case) # model audio_meta_train = VoxCelebMeta(hp.train.data_path, hp.train.meta_path) model = ClassificationModel(num_classes=audio_meta_train.num_speaker, **hp.model) # data loader audio_meta_class = globals()[hp.embed.audio_meta_class] params = {'data_path': hp.embed.data_path} if hp.embed.meta_path: params['meta_path'] = hp.embed.meta_path audio_meta = audio_meta_class(**params) data_loader = DataLoader(audio_meta, hp.embed.batch_size) # samples wav, mel_spec, speaker_id = data_loader.dataflow().get_data().next() ckpt = args.ckpt if args.ckpt else tf.train.latest_checkpoint(hp.logdir) pred_conf = PredictConfig( model=model, input_names=['x'], output_names=['embedding/embedding', 'prediction'], session_init=SaverRestore(ckpt) if ckpt else None) embedding_pred = OfflinePredictor(pred_conf) embedding, pred_speaker_id = embedding_pred(mel_spec)
from tf_transformer import Transformer from data_load import DataLoader # import time logging.basicConfig(level=logging.INFO) logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() print(hp) # save_hparams(hp, hp.logdir) logging.info("# Prepare train/eval batches") dataloader = DataLoader(hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab) xs = tf.placeholder(name='xs', dtype=tf.int32, shape=[16, 100]) ys1 = tf.placeholder(name='ys1', dtype=tf.int32, shape=[16, 99]) ys2 = tf.placeholder(name='ys2', dtype=tf.int32, shape=[16, 99]) logging.info("# Load model") m = Transformer(hp) loss = m.train(xs, (ys1, ys2)) nonpadding = tf.to_float(tf.not_equal(ys2, dataloader.get_pad())) # 0: <pad> loss = tf.reduce_sum(loss * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.GradientDescentOptimizer(hp.lr) train_op = optimizer.minimize(loss, global_step=global_step) # y_hat, eval_summaries = m.eval(xs, ys)
def save_model(self, path): self.saver.save(self.session, path) #valid_words = [u'萧炎',u'灵魂',u'火焰',u'萧薰儿',u'药老',u'天阶',u"云岚宗",u"乌坦城",u"惊诧", u"少女"] ##valid_words = [u'斗破'] #valid_word_examples =[dictionary[li] for li in valid_words] #valid_size = len(valid_word_examples) # #valid_sentence_examples = [4, 10, 11] #valid_sentence_size = len(valid_sentence_examples) #data_loader = DataLoader("doupocangqiong.txt", "utf8", vocabulary_size=50000, stop_word_file="stop_words.txt") VOCA_SIZE = 10000 data_loader = DataLoader("abc_news.txt", "ascii", vocabulary_size=VOCA_SIZE) data_loader.load() sentence_size = len(data_loader.line_list) #batch, labels = data_loader.generate_batch_pvdm(2, 2) #print 'batch', batch #print 'labels', labels embedding_word_size = 64 embedding_sentence_size = 64 batch_size = 128 window_size = 2 num_sampled = 64 valid_sentence_examples = [1, 2] valid_word_examples = [8, 15]
from sklearn.model_selection import cross_validate from sklearn.model_selection import StratifiedKFold from sklearn.metrics import accuracy_score from pipeline import Pipeline from models.cnn import CNN from models.svm import SVM_C from data_load import DataLoader, Dataset from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import seaborn as sn os.environ["CUDA_VISIBLE_DEVICES"] = "-1" dataloader = DataLoader(Dataset.PKU_MMD, 'PKUMMD', settings={'full': True}) X, y, z, v = dataloader.load_data(reload=False) conf_mat = np.zeros((z, z)) cnn = CNN([], z, X[0].shape, X[1].shape) svm = SVM_C([]) pipeline = Pipeline(X, y, z, "PKUMMD_full", cnn, svm) results = pipeline.train(10, 10, 100) print("Final accuracy: " + str(results[1])) sn.heatmap(results[0], annot=True, annot_kws={"size": 10}, fmt='g') # font size plt.show()
from hetu import gpu_ops as ad from hetu import optimizer from hetu import ndarray import numpy as np # import time logging.basicConfig(level=logging.INFO) logging.info("# hparams") hparams = Hparams() parser = hparams.parser hp = parser.parse_args() print(hp) logging.info("# Prepare train/eval batches") dataloader = DataLoader(hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab) ctx = ndarray.gpu(1) xs = ad.Variable(name='xs') ys1 = ad.Variable(name='ys1') ys2 = ad.Variable(name='ys2') nonpadding = ad.Variable(name='nonpadding') logging.info("# Load model") m = Transformer(hp) loss = m.train(xs, (ys1, ys2)) loss = ad.div_op(ad.reduce_sum_op(loss * nonpadding, axes=[0, 1]), ad.reduce_sum_op(nonpadding, axes=[0, 1]) + 1e-7) opt = optimizer.SGDOptimizer(hp.lr) train_op = opt.minimize(loss) executor = ad.Executor([loss, train_op], ctx=ctx)
else: print('pre_notice') model = module_f5() if torch.cuda.is_available(): print('using cuda') model = model.cuda() model.load_state_dict(torch.load(s_model)) model.eval() return model if __name__ == '__main__': pre_batch_size = 1500 pre_path = r'D:\PROJECT\mature_to_imm\data\pre_data.csv' pre_data_l = Im_data(in_path=pre_path) pre_data = DataLoader(pre_data_l, batch_size=pre_batch_size, shuffle=False) sl = [('105358.pkl', '105358.csv'), ('105639.pkl', '105639.csv'), ('105854.pkl', '105854.csv'), ('110159.pkl', '110159.csv'), ('110440.pkl', '110440.csv')] for i in range(len(sl)): smodel = r'model/allgene/' + sl[i][0] s_model = select(i, smodel) pre_dict(s_model=s_model, pre_data=pre_data, save_name=sl[i][1]) #输入数据 # smodel = r'D:\PROJECT\md\model\allgene\105358.pkl' # # # smode2 = r'C:\Users\ZML15\Desktop\mature_to_imm\out_p\05-28_15_44P2.pkl' # pre_batch_size = 1500 # pre_path = r'D:\PROJECT\mature_to_imm\data\pre_data.csv' # pre_data_l = Im_data(in_path=pre_path) # pre_data = DataLoader(pre_data_l, batch_size=pre_batch_size, shuffle=False) # pre_dict(s_model=smodel,pre_data=pre_data,save_name='6354.csv')
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" def savecounter(filename, arr1, arr2, arr3): with open(filename, 'wb') as fl: np.savez(fl, arr1, arr2, arr3) import seaborn as sn import matplotlib.pyplot as plt #cur_dataset = Dataset.TU #dataloader = DataLoader(cur_dataset, {'angle_limit':10, 'img_format':None}) cur_dataset = Dataset.YALE_EX_CROPPED dataloader = DataLoader(cur_dataset, {'resize': True}) X, y, z, v = dataloader.load_data(reload=False) #X = X/255.0 #z is the number of classes #v is the map of labels to which 0-num_classes correspond num_val_splits = 10 #Find optimal parameters CNN X_shape = (X.shape[1], X.shape[2]) # #CNN params # epochs_arr = [2] # batch_count_arr = [10,20,30] # conv_layers_count_arr = [1,2,3]
class TestLoad(unittest.TestCase): def setUp(self): # pylint: disable=g-missing-super-call self.loader = DataLoader("./data/train", "./data/valid", "./data/test", seq_length=512) def test_get_data(self): self.assertIsInstance(self.loader.train_data, list) self.assertIsInstance(self.loader.train_label, list) self.assertIsInstance(self.loader.valid_data, list) self.assertIsInstance(self.loader.valid_label, list) self.assertIsInstance(self.loader.test_data, list) self.assertIsInstance(self.loader.test_label, list) self.assertEqual(self.loader.train_len, len(self.loader.train_data)) self.assertEqual(self.loader.train_len, len(self.loader.train_label)) self.assertEqual(self.loader.valid_len, len(self.loader.valid_data)) self.assertEqual(self.loader.valid_len, len(self.loader.valid_label)) self.assertEqual(self.loader.test_len, len(self.loader.test_data)) self.assertEqual(self.loader.test_len, len(self.loader.test_label)) def test_pad(self): original_data1 = [[2, 3], [1, 1]] expected_data1_0 = [[2, 3], [2, 3], [2, 3], [2, 3], [1, 1]] expected_data1_1 = [[2, 3], [1, 1], [1, 1], [1, 1], [1, 1]] original_data2 = [[-2, 3], [-77, -681], [5, 6], [9, -7], [22, 3333], [9, 99], [-100, 0]] expected_data2 = [[-2, 3], [-77, -681], [5, 6], [9, -7], [22, 3333]] padding_data1 = self.loader.pad(original_data1, seq_length=5, dim=2) padding_data2 = self.loader.pad(original_data2, seq_length=5, dim=2) for i in range(len(padding_data1[0])): for j in range(len(padding_data1[0].tolist()[0])): self.assertLess( abs(padding_data1[0].tolist()[i][j] - expected_data1_0[i][j]), 10.001) for i in range(len(padding_data1[1])): for j in range(len(padding_data1[1].tolist()[0])): self.assertLess( abs(padding_data1[1].tolist()[i][j] - expected_data1_1[i][j]), 10.001) self.assertEqual(padding_data2[0].tolist(), expected_data2) self.assertEqual(padding_data2[1].tolist(), expected_data2) def test_format(self): self.loader.format() expected_train_label = int( self.loader.label2id[self.loader.train_label[0]]) expected_valid_label = int( self.loader.label2id[self.loader.valid_label[0]]) expected_test_label = int( self.loader.label2id[self.loader.test_label[0]]) for feature, label in self.loader.train_data: # pylint: disable=unused-variable format_train_label = label.numpy() break for feature, label in self.loader.valid_data: format_valid_label = label.numpy() break for feature, label in self.loader.test_data: format_test_label = label.numpy() break self.assertEqual(expected_train_label, format_train_label) self.assertEqual(expected_valid_label, format_valid_label) self.assertEqual(expected_test_label, format_test_label) self.assertIsInstance(self.loader.train_data, tf.data.Dataset) self.assertIsInstance(self.loader.valid_data, tf.data.Dataset) self.assertIsInstance(self.loader.test_data, tf.data.Dataset)
# !/usr/bin/env python import argparse from tensorpack.dataflow.remote import send_dataflow_zmq from data_load import DataLoader, AudioMeta from hparam import hparam as hp import multiprocessing if __name__ == '__main__': # get arguments parser = argparse.ArgumentParser() parser.add_argument('case', type=str, help='experiment case name.') parser.add_argument('-data_path', type=str) parser.add_argument('-dest_url', type=str) parser.add_argument('-num_thread', type=int, default=1) args = parser.parse_args() # set hyper-parameters from yaml file hp.set_hparam_yaml(case=args.case) if args.data_path: hp.train.data_path = args.data_path # dataflow audio_meta = AudioMeta(hp.train.data_path) data_loader = DataLoader(audio_meta, 1) num_thread = args.num_thread if args.num_thread else multiprocessing.cpu_count( ) // 1.5 data_loader = data_loader.dataflow(nr_prefetch=5000, nr_thread=args.num_thread) send_dataflow_zmq(data_loader, args.dest_url)