def do_convert(logdir1, logdir2, input_path, output_path): # Load graph model = Net2() model.actual_duration = librosa.core.get_duration(filename=input_path, sr=hp.default.sr) # TODO isolate out logdirs, uhh and how to pre-dl from s3? assert len(input_path) > 0, "must be non-empty input path" df = Net2DataFlow(data_path=input_path, batch_size=1) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = tf.train.latest_checkpoint(logdir2) session_inits = [] session_inits.append(SaverRestore(ckpt2)) session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, df) write_wav(audio[0], hp.default.sr, output_path)
def do_convert(args, logdir1, logdir2): # Load graph model = Net2() df = Net2DataFlow(hp.convert.data_path, hp.convert.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, df) # Write the result tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size) tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size) # Visualize PPGs heatmap = np.expand_dims(ppgs, 3) # channel=1 tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0]) writer = tf.summary.FileWriter(logdir2) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def eval(logdir1, logdir2): # Load graph model = Net2() # dataflow df = Net2DataFlow(hp.test2.data_path, hp.test2.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) # x_mfccs, y_spec, _ = next(df().get_data()) summ_loss, = predictor(next(df().get_data())) writer = tf.summary.FileWriter(logdir2) writer.add_summary(summ_loss) writer.close()
def set_enviroment(args, logdir1, logdir2): # Load graph set_env_s = datetime.datetime.now() ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) model = Net2ForConvert() session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu else: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print("PredictConfig") pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits) ) print("OfflinePredictor") predictor = OfflinePredictor(pred_conf) set_env_e = datetime.datetime.now() set_env_t = set_env_e - set_env_s print("Setting Environment time:{}s".format(set_env_t.seconds)) return predictor
def train(args, logdir1, logdir2): # model model = Net2() preprocessing(data_path, logdir2) # dataflow df = Net2DataFlow(data_path, hp.train2.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir2) # session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # per_process_gpu_memory_fraction=0.6, # ), # ) dataset_size = len(glob.glob(data_path + '/wav/*.wav')) print("\t\data_path : ", data_path) print("\t\tDataset Size : ", dataset_size) print("\t\tBatch Size : ", hp.train2.batch_size) print("\t\tSteps per epoch : ", (dataset_size // hp.train2.batch_size)) from time import sleep sleep(10) session_inits = [] ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=8)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=dataset_size // hp.train2.batch_size, session_init=ChainInit(session_inits)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) gpu_list = args.gpu.split(',') gpu_list = list(map(int, gpu_list)) #trainer = SimpleTrainer() trainer = SyncMultiGPUTrainerReplicated(gpu_list) #trainer = AsyncMultiGPUTrainer(gpu_list, False) launch_train_with_config(train_conf, trainer=trainer)
def do_convert(args, logdir1, logdir2): # Load graph model = Net2() data = get_mfccs_and_spectrogram(args.file) ckpt1 = '{}/{}'.format( logdir1, args.net1) if args.net1 else tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.net2) if args.net2 else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, data) target_file = args.file.split('/')[-1] portion = os.path.splitext(target_file) # converted_file = target_file.split('.')[0] + '_converted.wav' converted_file = portion[0] + '.wav' write_wav(audio[0], hp.Default.sr, args.savepath + converted_file) # Write the result tf.summary.audio('A', y_audio, hp.Default.sr, max_outputs=hp.Convert.batch_size) tf.summary.audio('B', audio, hp.Default.sr, max_outputs=hp.Convert.batch_size) # Visualize PPGs heatmap = np.expand_dims(ppgs, 3) # channel=1 tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0]) writer = tf.summary.FileWriter(args.savepath) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def critic_predict_dataflow(ctrl, data, log_dir, model_dir, vs_name): """ Prediction on a dataflow, used for testing a large batch of data """ ckpt = tf.train.latest_checkpoint(model_dir) if not ckpt: outputs = [0] * len(data[0]) logger.info("No model exists. Do not sort") return outputs model = critic_factory(ctrl, is_train=False, vs_name=vs_name) ds_val = critic_dataflow_factory(ctrl, data, is_train=False) output_names = ['{}/predicted_accuracy:0'.format(vs_name)] session_config = None if ctrl.critic_type == CriticTypes.LSTM: session_config = tf.ConfigProto(device_count={'GPU': 0}) pred_config = PredictConfig( model=model, input_names=model.input_names, output_names=output_names, session_creator=NewSessionCreator(config=session_config), session_init=SaverRestore(ckpt)) #with tf.Graph().as_default(): predictor = SimpleDatasetPredictor(pred_config, ds_val) outputs = [] for o in predictor.get_result(): outputs.extend(o[0]) return outputs
def train(args, logdir): # model # ;model = Net1() # dataflow # ;df = Net1DataFlow(hp.train1.data_path, hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), ) train_conf = TrainConfig( # ;model=model, # ;data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], # ;max_epoch=hp.train1.num_epochs, # ;steps_per_epoch=hp.train1.steps_per_epoch, # session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(','))
def get_config(X_train, X_valid, y_train, y_valid, model_path=None): data_train, data_valid = get_data(X_train, X_valid, y_train, y_valid) steps_per_epoch = data_train.size() cur_loss = 'tot_loss' triggerk = 15 visualk = 5 config = TrainConfig( model=Model(), dataflow=data_train, callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=triggerk), PeriodicTrigger(MinSaver('validation_' + cur_loss), every_k_epochs=triggerk), ScheduledHyperParamSetter('learning_rate', [(0, 2e-4), (150, 1e-4), (300, 5e-5), (600, 1e-5), (800, 1e-6)], interp='linear'), PeriodicTrigger(VisualizeRunner(), every_k_epochs=visualk), PeriodicTrigger(InferenceRunner(data_valid, [ScalarStats(cur_loss)]), every_k_epochs=5) ], session_init=SaverRestore(model_path) if model_path != None else None, # session_config=session_config, steps_per_epoch=data_train.size(), max_epoch=2000) return config
def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
def do_convert(args, logdir): # Load graph model = Net() df = NetDataFlow(hp.convert.data_path, hp.convert.batch_size) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) session_inits = [] if ckpt: session_inits.append(SaverRestore(ckpt)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio = convert(predictor, df) soundfile.write("a.wav", y_audio[0], 16000, format="wav", subtype="PCM_16") soundfile.write("b.wav", audio[0], 16000, format="wav", subtype="PCM_16") # Write the result tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size) tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size) writer = tf.summary.FileWriter(logdir) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def train(args, logdir1, logdir2): # model model = Net2() # dataflow df = Net2DataFlow(hp.train2.data_path, hp.train2.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir2) session_conf = tf.ConfigProto( # log_device_placement=True, allow_soft_placement=True, gpu_options=tf.GPUOptions( # allow_growth=True, per_process_gpu_memory_fraction=0.6, ), ) session_inits = [] ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=hp.train2.steps_per_epoch, session_init=ChainInit(session_inits), session_config=session_conf ) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) #trainer = SyncMultiGPUTrainerParameterServer(hp.train2.num_gpu) trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def pred_config(self, args, df, callbacks) -> TrainConfig: return TrainConfig( model=self.train_model(args), data=StagingInput(QueueInput(df)), callbacks=callbacks, max_epoch=args.epochs, steps_per_epoch=args.steps, session_init=SaverRestore(args.load) if args.load else None, )
def do_convert(args, logdir1, logdir2, input_dir): # Load graph model = Net2() # input_dir = hp.convert.data_base_dir_original + hp.convert.data_path df = Net2DataFlow(input_dir, hp.convert.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) # loop over all the audio files for wav_file in df.wav_files: # check if file is present audio out_path = wav_file.replace(hp.convert.data_base_dir_original, hp.convert.data_base_dir_convert) # change file extension from wv1/wv2 to wav out_path = out_path[:-2] + 'av' if os.path.isfile(out_path): # file is already present, move on to the next one. print("skipping " + wav_file) continue print("converting " + wav_file) # convert audio audio_len, feats = df.get_features(wav_file) audio_full = [] for feat in feats: input_arr = ([feat[0]], [feat[1]], [feat[2]]) audio, ppgs = convert(predictor, input_arr) audio_full.append( (audio[0] * hp.convert.amplitude_multiplier).astype(np.int16)) scipy.io.wavfile.write(out_path, hp.default.sr, np.concatenate(audio_full)[:audio_len])
def compute_accuracy(model, mel_spec, speaker_id, ckpt=None): pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=SaverRestore(ckpt) if ckpt else None) accuracy_pred = OfflinePredictor(pred_conf) acc, = accuracy_pred(mel_spec, speaker_id) return acc
def train(args, logdir): # model model = Net1() preprocessing(data_path) preprocessing(test_path) # dataflow df = Net1DataFlow(data_path, hp.train1.batch_size) df_test = Net1DataFlow(test_path, hp.train1.batch_size) #datas = df.get_data() #print(datas[1]) # set logger for event and model saver logger.set_logger_dir(logdir) #session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # ),) # cv test code # https://github.com/tensorpack/tensorpack/blob/master/examples/boilerplate.py train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=hp.train1.batch_size * 10, n_thread=1)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner( df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'], prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) num_gpu = hp.train1.num_gpu if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) num_gpu = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def inference_config(self, args) -> TrainConfig: loss_name = (self.validation_total_cost_var if args.validation is not None else self.total_cost_var) min_file = os.path.join(args.save, (f"min-{loss_name}.data-00000-of-00001")) model = self.inference_model(args) return PredictConfig( model=model, input_names=[i.name for i in model.inputs()], output_names=model.outputs(), session_init=SaverRestore(min_file), )
def run(args): num_gpus = get_nr_gpu() num_towers = max(num_gpus, 1) config = get_config(args, AvatarSynthModel(args), num_gpus, num_towers) if args.load_path: config.session_init = SaverRestore(args.load_path) # trainer = SyncMultiGPUTrainerParameterServer(num_towers) # trainer = QueueInputTrainer() trainer = SyncMultiGPUTrainerReplicated(num_towers) launch_train_with_config(config, trainer)
def run_atari_neptune_experiment(yamlFile=None, modelToLaod=None, epoch=None): global ENV_NAME, EXPERIMENT_MODEL, FRAME_HISTORY with open(yamlFile, 'r') as stream: try: yamlData = yaml.load(stream) except yaml.YAMLError as exc: print(exc) argsDict = {} for v in yamlData["parameters"]: argsDict[v["name"]] = v["default"] args = edict(argsDict) ENV_NAME = args.env assert ENV_NAME if hasattr(args, "frame_history"): FRAME_HISTORY = args.frame_history # examples.OpenAIGym.train_atari_with_neptune.FRAME_HISTORY = args.frame_history else: FRAME_HISTORY = 4 # FRAME_HISTORY = int(get_atribute(args, "frame_history", 4)) logger.info("Environment Name: {}".format(ENV_NAME)) # module_name, function_name = ctx.params.featureExtractor.split(".") module_name = args.experimentModelClass[:args.experimentModelClass. rfind('.')] class_name = args.experimentModelClass[args.experimentModelClass. rfind('.') + 1:] experiment_model_class = importlib.import_module( module_name).__dict__[class_name] EXPERIMENT_MODEL = experiment_model_class(args.experimentModelParameters) p = get_player() del p # set NUM_ACTIONS. Bloody hack! EXPERIMENT_MODEL.set_number_of_actions(NUM_ACTIONS) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cfg = PredictConfig(model=EXPERIMENT_MODEL, session_init=SaverRestore(modelToLaod), input_var_names=['state'], output_var_names=['logits']) dump_dir = os.path.join(dump_dir_root, str(epoch)) print "Writing to:{}".format(dump_dir) run_submission(cfg, dump_dir)
def train(args, logdir): # model print("####model") model = Net1() # dataflow print("####dataflow") df = Net1DataFlow(hp.Train1.data_path, hp.Train1.batch_size) # set logger for event and model saver print("####logger") logger.set_logger_dir(logdir) print("####session_conf") session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True, ), allow_soft_placement=True) print("####train_conf") train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=5)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.Train1.num_epochs, steps_per_epoch=hp.Train1.steps_per_epoch, session_config=session_conf) print("####ckpt") ckpt = '{}/{}'.format( logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) print("####trainer") trainer = SyncMultiGPUTrainerReplicated(hp.Train1.num_gpu) print("####launch_train_with_config") launch_train_with_config(train_conf, trainer=trainer)
def init_predictor(ckpt_dir): """ Initializes an OfflinePredictor for the 'Net1' Phoneme classifier, given a directory of tf-checkpoints. :param ckpt_dir: Checkpoint directory. :return: OfflinePredictor """ ckpt1 = tf.train.latest_checkpoint(ckpt_dir) assert ckpt1 is not None, "Failed to load checkpoint in '{}'".format( ckpt_dir) net1 = Net1() pred_conf = PredictConfig( model=net1, input_names=['x_mfccs'], output_names=['net1/ppgs'], session_init=ChainInit([SaverRestore(ckpt1, ignore=['global_step'])])) predictor = OfflinePredictor(pred_conf) return predictor
def train(args, logdir): # model model = Net1() # dataflow TIMIT_TRAIN_WAV = 'TIMIT/TRAIN/*/*/*.npz' TIMIT_TEST_WAV = 'TIMIT/TEST/*/*/*.npz' print(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TRAIN_WAV)) print(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TEST_WAV)) df = Net1DataFlow(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TRAIN_WAV), hp.train1.batch_size) df_test = Net1DataFlow(os.path.join(hp.train1.preproc_data_path, args.case, TIMIT_TEST_WAV), hp.train1.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=8)), callbacks=[ ModelSaver(checkpoint_dir=logdir), InferenceRunner(df_test(n_prefetch=1), ScalarStats(['net1/eval/loss', 'net1/eval/acc'],prefix='')), ], max_epoch=hp.train1.num_epochs, steps_per_epoch=hp.train1.steps_per_epoch, #session_config=session_conf ) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if hp.default.use_gpu == True: os.environ['CUDA_VISIBLE_DEVICES'] = hp.default.gpu_list train_conf.nr_tower = len(hp.default.gpu_list.split(',')) num_gpu = len(hp.default.gpu_list.split(',')) trainer = SyncMultiGPUTrainerReplicated(num_gpu) else: os.environ['CUDA_VISIBLE_DEVICES'] = '' trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def critic_predictor(ctrl, model_dir, vs_name): """ Create an OfflinePredictorWithSaver for test-time use. """ model = critic_factory(ctrl, is_train=False, vs_name=vs_name) output_names = ['{}/predicted_accuracy:0'.format(vs_name)] session_config = None if ctrl.critic_type == CriticTypes.LSTM: session_config = tf.ConfigProto(device_count={'GPU': 0}) pred_config = PredictConfig( model=model, input_names=model.input_names, output_names=output_names, session_creator=NewSessionCreator(config=session_config)) if model_dir: ckpt = tf.train.latest_checkpoint(model_dir) logger.info("Loading {} predictor from {}".format(vs_name, ckpt)) if ckpt: pred_config.session_init = SaverRestore(ckpt) predictor = OfflinePredictorWithSaver(pred_config) return predictor
def train(args, logdir): # model model = Net() # dataflow df = NetDataFlow(hp.train.data_path, hp.train.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir) session_conf = tf.ConfigProto( gpu_options=tf.GPUOptions( allow_growth=True, ),) session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45 # 占用GPU90%的显存 train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ ModelSaver(checkpoint_dir=logdir), # TODO EvalCallback() ], max_epoch=hp.train.num_epochs, steps_per_epoch=hp.train.steps_per_epoch, # session_config=session_conf ) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) if ckpt: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(hp.train.num_gpu) launch_train_with_config(train_conf, trainer=trainer)
def run(args): df_train = s2b_df(args.train_dir_face, args.train_dir_bitmoji, args.batch_size, args.num_threads) df_test = s2b_df(args.test_dir_face, args.test_dir_bitmoji, args.batch_size, args.num_threads) def update_lr(epoch, cur_lr): """ Approximate exponential decay of the learning rate """ if args.resume_lr: return cur_lr * args.decay else: return args.lr * args.decay**epoch callbacks = [ cb.ModelSaver(), cb.MinSaver('val-error-top1'), cb.HyperParamSetterWithFunc('LR', update_lr), # cb.HyperParamSetterWithFunc('Instance_Noise_Stddev', lambda epoch, stddev: stddev * args.decay), # cb.HyperParamSetterWithFunc('D_Uncertainty_Threshold', lambda epoch, threshold: threshold * args.decay), cb.MergeAllSummaries(period=args.summary_freq), ] infs = [ cb.ScalarStats( ['L_c', 'L_const', 'L_gan_d', 'L_gan_g', 'L_tid', 'L_tv']) ] if get_nr_gpu() > 0: callbacks.append(cb.GPUUtilizationTracker()) callbacks.append(cb.InferenceRunner(QueueInput(df_test), infs)) S2BTrainer(QueueInput(df_train), Selfie2BitmojiModel(args)).train_with_defaults( callbacks=callbacks, max_epoch=args.epochs, steps_per_epoch=df_train.size(), session_init=SaverRestore(args.load_path))
def export_eval_protobuf_model(checkpoint_dir, model_name, dataset, quant_type, output_file, batch_size): _, test_data, (img_shape, label_shape) = datasets.DATASETS[dataset]() model_func, input_spec, output_spec = get_model_func( "eval", model_name, quant_type, img_shape, label_shape[0]) input_names = [i.name for i in input_spec] output_names = [o.name for o in output_spec] predictor_config = PredictConfig(session_init=SaverRestore(checkpoint_dir + "/checkpoint"), tower_func=model_func, input_signature=input_spec, input_names=input_names, output_names=output_names, create_graph=False) print("Exporting optimised protobuf graph...") K.set_learning_phase(False) ModelExporter(predictor_config).export_compact(output_file, optimize=False) K.clear_session() pred = OfflinePredictor(predictor_config) test_data = BatchData(test_data, batch_size, remainder=True) test_data.reset_state() num_correct = 0 num_processed = 0 for img, label in tqdm(test_data): num_correct += sum(pred(img)[0].argmax(axis=1) == label.argmax(axis=1)) num_processed += img.shape[0] print("Exported model has accuracy {:.4f}".format(num_correct / num_processed)) return input_names, output_names, {i.name: i.shape for i in input_spec}
def eval(logdir): # Load graph model = Net() # dataflow df = NetDataFlow(hp.test.data_path, hp.test.batch_size) ckpt = tf.train.latest_checkpoint(logdir) session_inits = [] if ckpt: session_inits.append(SaverRestore(ckpt)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) r_mel, t_spec, _ = next(df().get_data()) summ_loss, = predictor(r_mel, t_spec) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.close()
def train(args, logdir2): # model model = Net2() # dataflow df = Net2DataFlow(hp.train2.mel_path, hp.train2.ppgs_path, hp.train2.batch_size) session_inits = [] ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ''' ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) ''' train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=hp.train2.steps_per_epoch, session_init=ChainInit(session_inits)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(hp.train2.num_gpu) print("strated trainer") launch_train_with_config(train_conf, trainer=trainer)
"Conv2DBackpropFilter": "MKL", "Conv2DBackpropInput": "MKL" } else: print "using tensorflow convolution" label_map = {} with ops.Graph().as_default() as g: tf.set_random_seed(my_task_index) np.random.seed(my_task_index) with g._kernel_label_map(label_map): with tf.device('/job:worker/task:{}/cpu:0'.format(my_task_index)): with tf.variable_scope(tf.get_variable_scope(), reuse=None): if args.task != 'train': cfg = PredictConfig( model=Model('/job:ps/task:0/cpu:0'), session_init=SaverRestore(args.load), input_var_names=['state'], output_var_names=['logits:0']) if args.task == 'play': play_model(cfg) elif args.task == 'eval': eval_model_multithread(cfg, EVAL_EPISODE) else: nr_towers = args.nr_towers predict_towers = args.nr_predict_towers * [ 0, ] if args.cpu != 1: nr_gpu = get_nr_gpu() if nr_gpu > 1:
DUELING = False else: logger.error("dueling argument must be t or f") if DOUBLE: logger.info("Using Double") if DUELING: logger.info("Using Dueling") assert ENV_NAME logger.info("Environment Name: {}".format(ENV_NAME)) p = get_player() del p # set NUM_ACTIONS if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu cfg = PredictConfig(model=Model(), session_init=SaverRestore(args.load), input_var_names=['state'], output_var_names=['Qvalue']) s_cfg = PredictConfig(model=Model(), session_init=SaverRestore(args.load), input_var_names=['state'], output_var_names=['saliency']) run(cfg, s_cfg, args.output) #run_submission(cfg, args.output, args.episode) #do_submit(args.output, args.api)