def train(args, logdir): # model model = Net1() preprocessing(data_path) preprocessing(test_path) # dataflow df = Net1DataFlow(data_path, hp.train1.batch_size) df_test = Net1DataFlow(test_path, hp.train1.batch_size) #datas = df.get_data() #print(datas[1]) # set logger for event and model saver logger.set_logger_dir(logdir) #session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # ),) # cv test code # https://github.com/tensorpack/tensorpack/blob/master/examples/boilerplate.py train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=hp.train1.batch_size * 10, n_thread=1)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner( df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'], prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) num_gpu = hp.train1.num_gpu if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) num_gpu = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
def train(args, logdir): # model print("####model") model = Net1() # dataflow print("####dataflow") df = Net1DataFlow(hp.Train1.data_path, hp.Train1.batch_size) # set logger for event and model saver print("####logger") logger.set_logger_dir(logdir) print("####session_conf") session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), allow_soft_placement=True) print("####train_conf") train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=5)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.Train1.num_epochs, steps_per_epoch=hp.Train1.steps_per_epoch, session_config=session_conf) print("####ckpt") ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) print("####trainer") trainer = SyncMultiGPUTrainerReplicated(hp.Train1.num_gpu) print("####launch_train_with_config") launch_train_with_config(train_conf, trainer=trainer)
def train(args, logdir): # model model = Net1() # dataflow df = Net1DataFlow(hp.train1.data_path, hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), ) train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, # session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) trainer = SimpleTrainer() # print('test stop') launch_train_with_config(train_conf, trainer=trainer)