def main(cfg): print(cfg) tf.reset_default_graph() logger.set_logger_dir('tflogs', action='d') copyfile(hydra.utils.to_absolute_path('model.py'), 'model.py') copyfile(hydra.utils.to_absolute_path('dataflow.py'), 'dataflow.py') if cfg.cat_name == 'smpl': train_df = SMPLDataFlow(cfg, True, 1000) val_df = VisSMPLDataFlow(cfg, True, 1000, port=1080) else: train_df = ShapeNetDataFlow(cfg, cfg.data.train_txt, True) val_df = VisDataFlow(cfg, cfg.data.val_txt, False, port=1080) config = TrainConfig( model=Model(cfg), dataflow=BatchData(PrefetchData(train_df, cpu_count() // 2, cpu_count() // 2), cfg.batch_size), callbacks=[ ModelSaver(), SimpleMovingAverage(['recon_loss', 'GAN/loss_d', 'GAN/loss_g', 'GAN/gp_loss', 'symmetry_loss'], 100), PeriodicTrigger(val_df, every_k_steps=30) ], monitors=tensorpack.train.DEFAULT_MONITORS() + [ScalarPrinter(enable_step=True, enable_epoch=False)], max_epoch=10 ) launch_train_with_config(config, SimpleTrainer())
def train(args, logdir): # model model = Net1() preprocessing(data_path) preprocessing(test_path) # dataflow df = Net1DataFlow(data_path, hp.train1.batch_size) df_test = Net1DataFlow(test_path, hp.train1.batch_size) #datas = df.get_data() #print(datas[1]) # set logger for event and model saver logger.set_logger_dir(logdir) #session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # ),) # cv test code # https://github.com/tensorpack/tensorpack/blob/master/examples/boilerplate.py train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=hp.train1.batch_size * 10, n_thread=1)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner( df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'], prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) num_gpu = hp.train1.num_gpu if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) num_gpu = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def train(args, logdir1, logdir2): # model model = Net2() # dataflow df = Net2DataFlow(hp.train2.data_path, hp.train2.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir2) session_conf = tf.ConfigProto( # log_device_placement=True, allow_soft_placement=True, gpu_options=tf.GPUOptions( # allow_growth=True, per_process_gpu_memory_fraction=0.6, ), ) session_inits = [] ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=hp.train2.steps_per_epoch, session_init=ChainInit(session_inits), session_config=session_conf ) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) #trainer = SyncMultiGPUTrainerParameterServer(hp.train2.num_gpu) trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def train(args, logdir): # model model = Net1() # dataflow TIMIT_TRAIN_WAV = 'TIMIT/TRAIN/*/*/*.npz' TIMIT_TEST_WAV = 'TIMIT/TEST/*/*/*.npz' print(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TRAIN_WAV)) print(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TEST_WAV)) df = Net1DataFlow(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TRAIN_WAV), hp.train1.batch_size) df_test = Net1DataFlow(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TEST_WAV), hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=8)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner(df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'],prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if hp.default.use_gpu == True: os.environ['CUDA_VISIBLE_DEVICES'] = hp.default.gpu_list train_conf.nr_tower = len(hp.default.gpu_list.split(',')) num_gpu = len(hp.default.gpu_list.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: os.environ['CUDA_VISIBLE_DEVICES'] = '' trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def train(args, logdir): # model model = Net1() # dataflow df = Net1DataFlow(hp.train1.data_path, hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), ) train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, # session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) trainer = SimpleTrainer() # print('test stop') launch_train_with_config(train_conf, trainer=trainer)
# TODO GenerateCallback() ], max_epoch=hp.train.num_epochs, steps_per_epoch=hp.train.steps_per_epoch, ) ckpt = '{}/{}'.format( hp.logdir, ckpt) if ckpt else tf.train.latest_checkpoint(hp.logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if gpu is not None: os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu)) train_conf.nr_tower = len(gpu) if hp.train.num_gpu <= 1: trainer = SimpleTrainer() else: trainer = SyncMultiGPUTrainerReplicated(gpus=hp.train.num_gpu) launch_train_with_config(train_conf, trainer=trainer) if __name__ == '__main__': fire.Fire(train) # class GenerateCallback(Callback): # def _setup_graph(self): # self.generator = self.trainer.get_predictor( # get_eval_input_names(), # get_eval_output_names()) # self.df = DataFlow(hp.data_path, hp.generate.batch_size)