def experiment_fn(run_config, params): model = Model() estimator = tf.estimator.Estimator( model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) source_vocab = data_loader.load_vocab("source_vocab") target_vocab = data_loader.load_vocab("target_vocab") Config.data.rev_source_vocab = utils.get_rev_vocab(source_vocab) Config.data.rev_target_vocab = utils.get_rev_vocab(target_vocab) Config.data.source_vocab_size = len(source_vocab) Config.data.target_vocab_size = len(target_vocab) train_data, test_data = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = data_loader.get_dataset_batch(train_data, batch_size=Config.model.batch_size, scope="train") test_input_fn, test_input_hook = data_loader.get_dataset_batch(test_data, batch_size=Config.model.batch_size, scope="test") train_hooks = [train_input_hook] if Config.train.print_verbose: train_hooks.append(hook.print_variables( variables=['train/enc_0'], rev_vocab=utils.get_rev_vocab(source_vocab), every_n_iter=Config.train.check_hook_n_iter)) train_hooks.append(hook.print_variables( variables=['train/target_0', 'train/pred_0'], rev_vocab=utils.get_rev_vocab(target_vocab), every_n_iter=Config.train.check_hook_n_iter)) if Config.train.debug: train_hooks.append(tf_debug.LocalCLIDebugHook()) eval_hooks = [test_input_hook] if Config.train.debug: eval_hooks.append(tf_debug.LocalCLIDebugHook()) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=train_hooks, eval_hooks=eval_hooks ) return experiment
def experiment_fn(run_config, params): model = Model() estimator = tf.estimator.Estimator(model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) data_loader = DataLoader(task_path=Config.data.task_path, task_id=Config.data.task_id, task_test_id=Config.data.task_id, w2v_dim=Config.model.embed_dim, use_pretrained=Config.model.use_pretrained) data = data_loader.make_train_and_test_set() vocab = data_loader.vocab # setting data property Config.data.vocab_size = len(vocab) Config.data.max_facts_seq_len = data_loader.max_facts_seq_len Config.data.max_fact_count = data_loader.max_fact_count Config.data.max_question_seq_len = data_loader.max_question_seq_len print("max_facts_seq_len:", data_loader.max_facts_seq_len) print("max_fact_count:", data_loader.max_fact_count) print("max_question_seq_len:", data_loader.max_question_seq_len) train_input_fn, train_input_hook = data_loader.make_batch( data["train"], batch_size=Config.model.batch_size, scope="train") test_input_fn, test_input_hook = data_loader.make_batch( data["test"], batch_size=Config.model.batch_size, scope="test") train_hooks = [train_input_hook] if Config.train.print_verbose: pass if Config.train.debug: train_hooks.append(tf_debug.LocalCLIDebugHook()) eval_hooks = [test_input_hook] if Config.train.debug: eval_hooks.append(tf_debug.LocalCLIDebugHook()) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=train_hooks, eval_hooks=eval_hooks) return experiment
def create_experiment(output_dir, data_dir, model_name, train_steps, eval_steps): """Create Experiment.""" hparams = create_hparams(FLAGS.hparams_set, data_dir) estimator, input_fns = create_experiment_components( hparams=hparams, output_dir=output_dir, data_dir=data_dir, model_name=model_name) eval_metrics = metrics.create_evaluation_metrics( zip(FLAGS.problems.split("-"), hparams.problem_instances)) if (hasattr(FLAGS, "autotune") and FLAGS.autotune and FLAGS.objective not in eval_metrics): raise ValueError("Tuning objective %s not among evaluation metrics %s" % (FLAGS.objective, eval_metrics.keys())) train_monitors = [] eval_hooks = [] if FLAGS.tfdbg: hook = debug.LocalCLIDebugHook() train_monitors.append(hook) eval_hooks.append(hook) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns[tf.contrib.learn.ModeKeys.TRAIN], eval_input_fn=input_fns[tf.contrib.learn.ModeKeys.EVAL], eval_metrics=eval_metrics, train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=FLAGS.local_eval_frequency, train_monitors=train_monitors, eval_hooks=eval_hooks)
def train(args): # So I don't frigging forget what caused working models save_args(args) if args["use_comet"]: experiment = Experiment(api_key="bRptcjkrwOuba29GcyiNaGDbj", project_name="macgraph") experiment.log_multiple_params(args) estimator = get_estimator(args) if args["use_tf_debug"]: hooks = [tf_debug.LocalCLIDebugHook()] else: hooks = [] train_spec = tf.estimator.TrainSpec( input_fn=gen_input_fn(args, "train"), max_steps=args["max_steps"] * 1000 if args["max_steps"] is not None else None, hooks=hooks) eval_spec = tf.estimator.EvalSpec(input_fn=gen_input_fn(args, "eval"), throttle_secs=300) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run(mode, run_config, params): model = Model() # ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from='logs/pretrained/vgg_16.ckpt',vars_to_warm_start='vgg_16.*') estimator = tf.estimator.Estimator( model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, # warm_start_from=ws, config=run_config) if Config.train.debug: debug_hooks = tf_debug.LocalCLIDebugHook() hooks = [debug_hooks] else: hooks = [] loss_hooks = tf.train.LoggingTensorHook({'total_loss': 'loss/add_7', 'content_loss': 'loss/mul_1', 'style_loss': 'loss/mul_6:0', 'step': 'global_step:0'}, every_n_iter=Config.train.check_hook_n_iter) if mode == 'train': train_data = data_loader.get_tfrecord(mode, shuffle=True) train_input_fn, train_input_hook = data_loader.get_dataset_batch(train_data, buffer_size=1000, batch_size=Config.model.batch_size, scope="train") hooks.extend([train_input_hook, loss_hooks]) estimator.train(input_fn=train_input_fn, hooks=hooks, max_steps=Config.train.max_steps) else: raise ValueError('no %s mode' % (mode))
def main(args): run_name = FLAGS.run_name or time.strftime('%Y%m%d-%H%M%S', time.localtime()) output_dir = path.join(FLAGS.run_dir, run_name) gin.bind_parameter('SC2EnvironmentConfig.map_name', FLAGS.map) gin_files = [] if path.exists(output_dir): print('Resuming', output_dir) gin_files.append(path.join(output_dir, 'operative_config-0.gin')) if FLAGS.gin_file: gin_files += FLAGS.gin_file gin.parse_config_files_and_bindings(gin_files, FLAGS.gin_param, finalize_config=True) env = VecEnv(SC2Environment, SC2EnvironmentConfig()) try: agent = A2CAgent(env.spec, callbacks=RewardSummaryHook( summary_output_dir=output_dir, write_summaries_secs=30)) runner = Runner(env, agent) print_parameter_summary() config = tf.ConfigProto() config.gpu_options.allow_growth = FLAGS.gpu_memory_allow_growth if FLAGS.gpu_memory_fraction: config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction hooks = [gin.tf.GinConfigSaverHook(output_dir)] if FLAGS.step_limit: hooks.append(tf.train.StopAtStepHook(last_step=FLAGS.step_limit)) hooks.append(LogProgressHook(FLAGS.step_limit)) if FLAGS.profile: hooks.append( tf.train.ProfilerHook(save_secs=60, output_dir=output_dir)) if FLAGS.debug: hooks.append(tf_debug.LocalCLIDebugHook()) else: hooks.append(tf.train.NanTensorHook(agent.loss)) with tf.train.MonitoredTrainingSession( config=config, hooks=hooks, checkpoint_dir=output_dir, save_summaries_secs=30, save_checkpoint_secs=FLAGS.save_checkpoint_secs, save_checkpoint_steps=FLAGS.save_checkpoint_steps) as sess: while not sess.should_stop(): def step_fn(step_context): runner.train(step_context, 512) sess.run_step_fn(step_fn) finally: env.close()
def run(mode, run_config, params): model = Model() estimator = tf.estimator.Estimator(model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) if Config.train.debug: debug_hooks = tf_debug.LocalCLIDebugHook() hooks = [debug_hooks] else: hooks = [] loss_hooks = tf.train.LoggingTensorHook( { 'loss': 'loss/total_loss:0', 'step': 'global_step:0' }, every_n_iter=Config.train.check_hook_n_iter) train_data = data_loader.get_tfrecord(shuffle=True) train_input_fn, train_input_hook = data_loader.get_dataset_batch( train_data, batch_size=Config.model.batch_size, scope="train") hooks.extend([train_input_hook, loss_hooks]) estimator.train(input_fn=train_input_fn, hooks=hooks, max_steps=Config.train.max_steps)
def _create_hooks(mparams, output_dir): """Create hooks """ # Create training hooks train_hooks = [] # Create evaluating hooks and eval config eval_hooks = [] # Write prediction to file prediction_hook = Prediction(mparams, FLAGS.output_dir) eval_hooks.append(prediction_hook) # Write false prediction to file false_prediction_hook = FalsePrediction(mparams, FLAGS.output_dir) eval_hooks.append(false_prediction_hook) if FLAGS.schedule == 'continuous_eval': eval_output_dir = os.path.join(output_dir, 'eval_continuous') eval_hooks.append( tf.contrib.training.SummaryAtEndHook(eval_output_dir)) elif FLAGS.schedule == 'evaluate': # stop until data are exhausted FLAGS.eval_steps = None if FLAGS.debug: from tensorflow.python import debug as tf_debug debug_hook = tf_debug.LocalCLIDebugHook() train_hooks.append(debug_hook) eval_hooks.append(debug_hook) return train_hooks, eval_hooks
def my_model(features, labels, mode, params): """DNN with three hidden layers, and dropout of 0.1 probability.""" word_embed = np.load(embed_file) training = mode == tf.estimator.ModeKeys.TRAIN m = Model(params, word_embed, features, labels, training) # Compute evaluation metrics. metrics = {'accuracy': m.acc} tf.summary.scalar('accuracy', m.acc[1]) f1_hook = F1Hook(m.pred, labels) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=m.loss, eval_metric_ops=metrics, evaluation_hooks=[f1_hook]) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN training_hooks = [] logging_hook = tf.train.LoggingTensorHook({"loss" : m.loss, "accuracy" : m.acc[0]}, every_n_iter=LOG_N_ITER) training_hooks.append(logging_hook) if args.tfdbg: training_hooks.append(tf_debug.LocalCLIDebugHook()) return tf.estimator.EstimatorSpec(mode, loss=m.loss, train_op=m.train_op, training_hooks = training_hooks)
def get_config(args): if args.gpu != None: NR_GPU = len(args.gpu.split(',')) batch_size = int(args.batch_size) // NR_GPU else: batch_size = int(args.batch_size) ds_train = get_data('train', args.multi_scale, batch_size) ds_test = get_data('test', False, batch_size) callbacks = [ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(0, 1e-4), (3, 2e-4), (6, 3e-4), (10, 6e-4), (15, 1e-3), (60, 1e-4), (90, 1e-5)]), ScheduledHyperParamSetter('unseen_scale', [(0, cfg.unseen_scale), (cfg.unseen_epochs, 0)]), HumanHyperParamSetter('learning_rate'), ] if cfg.mAP == True: callbacks.append( PeriodicTrigger(InferenceRunner(ds_test, [CalMAP(cfg.test_list)]), every_k_epochs=3)) if args.debug: callbacks.append(HookToCallback(tf_debug.LocalCLIDebugHook())) return TrainConfig( dataflow=ds_train, callbacks=callbacks, model=Model(args.data_format), max_epoch=cfg.max_epoch, )
def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None, use_validation_monitor=False, validation_monitor_kwargs=None, use_early_stopping=False, early_stopping_kwargs=None): """Create train and eval hooks for Experiment.""" train_monitors = [] eval_hooks = [] if use_tfdbg: hook = debug.LocalCLIDebugHook() train_monitors.append(hook) eval_hooks.append(hook) if use_dbgprofile: # Recorded traces can be visualized with chrome://tracing/ # The memory/tensor lifetime is also profiled tf.logging.info("Using ProfilerHook") defaults = dict(save_steps=10, show_dataflow=True, show_memory=True) defaults.update(dbgprofile_kwargs) train_monitors.append(tf.contrib.hooks.ProfilerHook(**defaults)) if use_validation_monitor: tf.logging.info("Using ValidationMonitor") train_monitors.append( tf.contrib.learn.monitors.ValidationMonitor( hooks=eval_hooks, **validation_monitor_kwargs)) if use_early_stopping: tf.logging.info("Using EarlyStoppingHook") hook = metrics_hook.EarlyStoppingHook(**early_stopping_kwargs) # Adding to both training and eval so that eval aborts as well train_monitors.append(hook) eval_hooks.append(hook) return train_monitors, eval_hooks
def create_experiment_fn(output_dir=None): """Experiment function.""" distance_metric = (tf.contrib.factorization.COSINE_DISTANCE if FLAGS.use_cosine_distance else tf.contrib.factorization.SQUARED_EUCLIDEAN_DISTANCE) initial_clusters = (tf.contrib.factorization.KMEANS_PLUS_PLUS_INIT if FLAGS.use_kmeans_plus_plus else tf.contrib.factorization.RANDOM_INIT) # Create estimator kmeans = kmeans_lib.KMeansClustering( FLAGS.num_clusters, model_dir=output_dir, initial_clusters=initial_clusters, distance_metric=distance_metric, use_mini_batch=True, relative_tolerance=FLAGS.relative_tolerance, config=tf.contrib.learn.RunConfig( save_checkpoints_secs=FLAGS.save_checkpoints_secs)) train_monitors = [] if FLAGS.debug: train_monitors.append(tf_debug.LocalCLIDebugHook()) return tf.contrib.learn.Experiment( estimator=kmeans, train_steps=FLAGS.num_train_steps, eval_steps=1, eval_input_fn=_input_fn, train_input_fn=_input_fn, train_monitors=train_monitors, export_strategies=[ saved_model_export_utils.make_export_strategy(_predict_input_fn, exports_to_keep=5) ])
def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None, use_validation_monitor=False, validation_monitor_kwargs=None): """Create train and eval hooks for Experiment.""" train_monitors = [] eval_hooks = [] if use_tfdbg: hook = debug.LocalCLIDebugHook() train_monitors.append(hook) eval_hooks.append(hook) if use_dbgprofile: # Recorded traces can be visualized with chrome://tracing/ # The memory/tensor lifetime is also profiled defaults = dict(save_steps=10, show_dataflow=True, show_memory=True) defaults.update(dbgprofile_kwargs) train_monitors.append(tf.contrib.hooks.ProfilerHook(**defaults)) if use_validation_monitor: train_monitors.append( tf.contrib.learn.monitors.ValidationMonitor( hooks=eval_hooks, **validation_monitor_kwargs)) return train_monitors, eval_hooks
def main(unused_argv): hparams = udc_hparams.create_hparams() model_fn = udc_model.create_model_fn(hparams, model_impl=dual_encoder_model) estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir=MODEL_DIR) input_fn_train = udc_inputs.create_input_fn( mode=tf.contrib.learn.ModeKeys.TRAIN, input_files=[TRAIN_FILE], batch_size=hparams.batch_size, num_epochs=FLAGS.num_epochs) input_fn_eval = udc_inputs.create_input_fn( mode=tf.contrib.learn.ModeKeys.EVAL, input_files=[VALIDATION_FILE], batch_size=hparams.eval_batch_size, num_epochs=1) eval_metrics = udc_metrics.create_evaluation_metrics() eval_monitor = tf.contrib.learn.monitors.ValidationMonitor( input_fn=input_fn_eval, every_n_steps=FLAGS.eval_every, metrics=eval_metrics) dbg_hook = tfdbg.LocalCLIDebugHook() estimator.fit(input_fn=input_fn_train, steps=FLAGS.num_steps, monitors=[eval_monitor])
def create_experiment(output_dir, data_dir, model_name, train_steps, eval_steps): """Create Experiment.""" hparams = create_hparams(FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) if FLAGS.worker_id == 0 and FLAGS.schedule in ["local_run", "train"]: save_metadata(output_dir, hparams) estimator, input_fns = create_experiment_components(hparams=hparams, output_dir=output_dir, data_dir=data_dir, model_name=model_name) train_monitors = [] eval_hooks = [] if FLAGS.tfdbg: hook = debug.LocalCLIDebugHook() train_monitors.append(hook) eval_hooks.append(hook) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN], eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL], train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=FLAGS.local_eval_frequency, train_monitors=train_monitors, eval_hooks=eval_hooks)
def experiment_fn(run_config, params): model = Model() # todo 核心模型 estimator = tf.estimator.Estimator( # todo estimator高级api model_fn=model.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) # todo 这些操作完全可以使用tf.data高级api来完成 train_data, test_data = data_loader.make_train_and_test_set() # todo 创建hook钩子,好好学习 train_input_fn, train_input_hook = data_loader.make_batch( train_data, batch_size=Config.model.batch_size, scope="train") test_input_fn, test_input_hook = data_loader.make_batch( test_data, batch_size=Config.model.batch_size, scope="test") train_hooks = [train_input_hook] if Config.train.print_verbose: train_hooks.append( hook.print_variables(variables=['train/input_0'], rev_vocab=get_rev_vocab(vocab), every_n_iter=Config.train.check_hook_n_iter)) train_hooks.append( hook.print_target(variables=['train/target_0', 'train/pred_0'], every_n_iter=Config.train.check_hook_n_iter)) if Config.train.debug: train_hooks.append(tf_debug.LocalCLIDebugHook()) eval_hooks = [test_input_hook] if Config.train.debug: eval_hooks.append(tf_debug.LocalCLIDebugHook()) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=train_hooks, eval_hooks=eval_hooks) return experiment
def train(n_epochs, config): train_tfrecord = "train.tfrecord" test_tfrecord = "test.tfrecord" if not os.path.exists(train_tfrecord): questions, answers, word2index = datas.load_dataset( dir=".", num_words=VOCABULARY_SIZE) size = len(questions) fractions = [0.8, 0.2, 0.0] # fractions = [0.1, 0.02, 0.1] l1 = int(fractions[0] * size) l2 = int((fractions[0] + fractions[1]) * size) TFgenerator.write_tfrecord(train_tfrecord, questions[:l1], answers[:l1]) TFgenerator.write_tfrecord(test_tfrecord, questions[l1:l2], answers[l1:l2]) train_iter = TFgenerator.load_datasets(train_tfrecord, n_epochs, K) test_iter = TFgenerator.load_datasets(test_tfrecord, 1, K) checkpointing_config = tf.estimator.RunConfig( save_checkpoints_secs=20 * 60, # Save checkpoints every 20 minutes. keep_checkpoint_max=10, # Retain the 10 most recent checkpoints. save_checkpoints_steps=None, save_summary_steps=5) dan_classifier = tf.estimator.Estimator(model_fn=qa_model_fn, model_dir="/tmp/DAN_model", params={}, config=checkpointing_config) tensors_to_log = {"predictions": "softmax_tensor"} logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=100) from tensorflow.python import debug as tf_debug hooks = [tensors_to_log, tf_debug.LocalCLIDebugHook()] dan_classifier.train(input_fn=lambda: train_iter, hooks=hooks) print dan_classifier.evaluate(input_fn=lambda: test_iter, hooks=hooks) def serving_input_receiver_fn(): features = { 'question': tf.VarLenFeature(tf.int64), 'answer': tf.VarLenFeature(tf.int64), 'q_len': tf.FixedLenFeature([], tf.int64), 'a_len': tf.FixedLenFeature([], tf.int64) } serialized_tf_example = tf.placeholder(dtype=tf.string, name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.parse_example(serialized_tf_example, features) return tf.estimator.export.ServingInputReceiver( features, receiver_tensors) dan_classifier.export_savedmodel(".", serving_input_receiver_fn)
def main(unused_argv): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if FLAGS.clean_model_dir: shutil.rmtree(FLAGS.model_dir, ignore_errors=True) # Set up a RunConfig to only save checkpoints once per training cycle. run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9) model = tf.estimator.Estimator( model_fn=deeplab_model.deeplabv3_plus_model_fn, model_dir=FLAGS.model_dir, config=run_config, params={ 'output_stride': FLAGS.output_stride, 'batch_size': FLAGS.batch_size, 'base_architecture': FLAGS.base_architecture, 'pre_trained_model': FLAGS.pre_trained_model, 'batch_norm_decay': _BATCH_NORM_DECAY, 'num_classes': _NUM_CLASSES, 'tensorboard_images_max_outputs': FLAGS.tensorboard_images_max_outputs, 'weight_decay': FLAGS.weight_decay, 'learning_rate_policy': FLAGS.learning_rate_policy, 'num_train': _NUM_IMAGES['train'], 'initial_learning_rate': FLAGS.initial_learning_rate, 'max_iter': FLAGS.max_iter, 'end_learning_rate': FLAGS.end_learning_rate, 'power': _POWER, 'momentum': _MOMENTUM, 'freeze_batch_norm': FLAGS.freeze_batch_norm, 'initial_global_step': FLAGS.initial_global_step }) for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval): tensors_to_log = { 'learning_rate': 'learning_rate', 'cross_entropy': 'cross_entropy', 'train_px_accuracy': 'train_px_accuracy', 'train_mean_iou': 'train_mean_iou', } logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1) train_hooks = [logging_hook] eval_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() train_hooks.append(debug_hook) eval_hooks = [debug_hook] tf.logging.info("Start training.") model.train( input_fn=lambda: input_fn(True, FLAGS.data_dir, FLAGS.batch_size, FLAGS.epochs_per_eval), hooks=train_hooks, # steps=1 # For debug )
def main(unused_argv): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' pred_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() pred_hooks = [debug_hook] model = tf.estimator.Estimator( model_fn=deeplab_model.deeplabv3_model_fn, model_dir=FLAGS.model_dir, params={ 'output_stride': FLAGS.output_stride, 'batch_size': 1, # Batch size must be 1 because the images' size may differ 'base_architecture': FLAGS.base_architecture, 'pre_trained_model': None, 'batch_norm_decay': None, 'num_classes': _NUM_CLASSES, }) examples = dataset_util.read_examples_list(FLAGS.infer_data_list) image_files = [ os.path.join(FLAGS.data_dir, filename) for filename in examples ] predictions = model.predict( input_fn=lambda: preprocessing.eval_input_fn(image_files), hooks=pred_hooks) output_dir = FLAGS.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) for pred_dict, image_path in zip(predictions, image_files): image_basename = os.path.splitext(os.path.basename(image_path))[0] output_filename = image_basename + '_mask.png' path_to_output = os.path.join(output_dir, output_filename) img = Image.open(image_path) print("generating:", path_to_output) mask = pred_dict['decoded_labels'] mask = Image.fromarray(mask) crf_result = crf(img, pred_dict['probabilities'], 10) crf_argmax = np.expand_dims(np.expand_dims(np.argmax(crf_result, axis=2), axis=0), axis=3) crf_decode = np.squeeze( preprocessing.decode_labels(crf_argmax)).transpose((1, 0, 2)) cmap = plt.get_cmap('bwr') f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) ax1.imshow(mask) ax1.set_title('Segmentation with Deeplab') ax2.imshow(crf_decode, cmap=cmap) ax2.set_title('Segmentation with CRF post-processing _ 1') f.savefig(path_to_output, bbox_inches='tight')
def main(unused_arg): hparams = hp.create_hparams() model_fn = model.create_model_fn(hparams) estimator = tf.contrib.learn.Estimator( model_fn=model_fn, config=tf.contrib.learn.RunConfig( save_checkpoints_steps=FLAGS.eval_every, save_summary_steps=10000, log_step_count_steps=10000, model_dir=MODEL_DIR)) input_fn_train = input.create_input_fn( input_files=[TRAIN_FILE_PATH], batch_size=hparams.batch_size, mode=tf.contrib.learn.ModeKeys.TRAIN, num_epochs=hparams.num_epochs) monitors_list = [] input_fn_validation = input.create_input_fn([VALIDATION_FILE_PATH], tf.contrib.learn.ModeKeys.EVAL, hparams.eval_batch_size, 1) validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( input_fn=input_fn_validation, every_n_steps=FLAGS.eval_every, metrics=metrics.create_evaluation_metrics('validation')) monitors_list.append(validation_monitor) input_fn_test = input.create_input_fn([TEST_FILE_PATH], tf.contrib.learn.ModeKeys.EVAL, hparams.eval_batch_size, 1) test_monitor = tf.contrib.learn.monitors.ValidationMonitor( input_fn=input_fn_test, every_n_steps=FLAGS.eval_every, metrics=metrics.create_evaluation_metrics('test')) monitors_list.append(test_monitor) if FLAGS.debug: debuger = tf_debug.LocalCLIDebugHook() monitors_list.append(debuger) input_fn_train_eval = input.create_input_fn([TRAIN_FILE_PATH], tf.contrib.learn.ModeKeys.EVAL, hparams.batch_size, 1) train_monitor = tf.contrib.learn.monitors.ValidationMonitor( input_fn=input_fn_train_eval, every_n_steps=FLAGS.train_eval_every, metrics={ 'train_accuracy': metrics.create_metric_spec(tf.contrib.metrics.streaming_accuracy, 'predictions', None) }) monitors_list.append(train_monitor) estimator.fit(input_fn=input_fn_train, steps=None, monitors=monitors_list) hp.write_hparams_to_file(hparams, MODEL_DIR)
def add_debug_hooks(hooks): if FLAGS.debug_tb: debug_hook = tf_debug.TensorBoardDebugHook("pawel-workstation:8080") hooks.append(debug_hook) elif FLAGS.debug_cli: debug_hook = tf_debug.LocalCLIDebugHook() debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) hooks.append(debug_hook)
def train_one_epoch(self, nn, steps=None): #pylint: disable=invalid-name ''' train for one epoch ''' mode = utils.TRAIN tfconf = self.config['solver']['run_config'] nn.train( input_fn=self.input_fn(mode), steps=steps, hooks=[tf_debug.LocalCLIDebugHook()] if tfconf['debug'] else None)
def __init__(self, *args, **kwargs): """ Args: args, kwargs: arguments to create `tfdbg.LocalCLIDebugHook`. Refer to tensorflow documentation for details. """ from tensorflow.python import debug as tfdbg super(TFLocalCLIDebugHook, self).__init__(tfdbg.LocalCLIDebugHook())
def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, run_config): """Create Experiment.""" estimator, input_fns = create_experiment_components( data_dir=data_dir, model_name=model_name, hparams=hparams, run_config=run_config) # input_fns: input_function train_monitors = [] eval_hooks = [] if FLAGS.tfdbg: hook = debug.LocalCLIDebugHook() train_monitors.append(hook) eval_hooks.append(hook) if FLAGS.dbgprofile: # Recorded traces can be visualized with chrome://tracing/ # The memory/tensor lifetime is also profiled train_monitors.append( tf.contrib.hooks.ProfilerHook( save_steps=10, output_dir=run_config.model_dir, show_dataflow=True, show_memory=True, )) if FLAGS.schedule == "train_and_evaluate": if FLAGS.local_eval_frequency: train_monitors.append( tf.contrib.learn.monitors.ValidationMonitor( input_fn=input_fns[tf.estimator.ModeKeys.EVAL], eval_steps=eval_steps, every_n_steps=FLAGS.local_eval_frequency, hooks=eval_hooks, early_stopping_rounds=FLAGS.eval_early_stopping_steps, early_stopping_metric=FLAGS.eval_early_stopping_metric, early_stopping_metric_minimize=FLAGS. eval_early_stopping_metric_minimize)) optional_kwargs = {} if FLAGS.export_saved_model: assert len(hparams.problem_instances) == 1 problem = hparams.problem_instances[0] optional_kwargs["export_strategies"] = [ make_export_strategy(problem, hparams) ] return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN], eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL], train_steps=train_steps, eval_steps=eval_steps, train_monitors=train_monitors, eval_hooks=eval_hooks, min_eval_frequency=FLAGS.local_eval_frequency, train_steps_per_iteration=FLAGS.local_eval_frequency, eval_delay_secs=0, **optional_kwargs)
def main(unused_argv): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' pred_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() pred_hooks = [debug_hook] model = tf.estimator.Estimator( model_fn=deeplab_model.deeplabv3_model_fn, model_dir=FLAGS.model_dir, params={ 'output_stride': FLAGS.output_stride, 'batch_size': 1, # Batch size must be 1 because the images' size may differ 'base_architecture': FLAGS.base_architecture, 'pre_trained_model': None, 'batch_norm_decay': None, 'num_classes': _NUM_CLASSES, }) examples = dataset_util.read_examples_list(FLAGS.infer_data_list) image_files = [ os.path.join(FLAGS.data_dir, filename) + '.jpg' for filename in examples ] output_dir = FLAGS.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) CROP_HEIGHT = 500 CROP_WIDTH = 500 for img in image_files: width, height = Image.open(img).size full_mask = Image.new('RGBA', (width, height)) print('SIZE', width, height) for i in range(0, int(math.ceil(width / CROP_WIDTH))): for j in range(0, int(math.ceil(height / CROP_HEIGHT))): CROP = [ j * CROP_HEIGHT, i * CROP_WIDTH, CROP_HEIGHT if (j + 1) * CROP_HEIGHT <= height else height % CROP_HEIGHT, CROP_WIDTH if (i + 1) * CROP_WIDTH <= width else width % CROP_WIDTH ] print(CROP) predictions = model.predict( input_fn=lambda: preprocessing.infer_input_fn([img], CROP), hooks=pred_hooks) mask = next(predictions)['decoded_labels'] mask = Image.fromarray(mask) full_mask.paste(mask, (CROP[1], CROP[0])) print('saving') image_basename = img.split('.')[0] full_mask.save(image_basename + '_mask.png')
def main(_): # Generate some fake Iris data. # It is okay for this example because this example is about how to use the # debugger, not how to use machine learning to solve the Iris classification # problem. def training_input_fn(): return ({ "features": tf.random_normal([128, 4]) }, tf.random_uniform([128], minval=0, maxval=3, dtype=tf.int32)) def test_input_fn(): return ({ "features": tf.random_normal([32, 4]) }, tf.random_uniform([32], minval=0, maxval=3, dtype=tf.int32)) feature_columns = [ tf.feature_column.numeric_column("features", shape=(4, )) ] # Build 3 layer DNN with 10, 20, 10 units respectively. model_dir = FLAGS.model_dir or tempfile.mkdtemp( prefix="debug_tflearn_iris_") classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, model_dir=model_dir) if FLAGS.debug and FLAGS.tensorboard_debug_address: raise ValueError( "The --debug and --tensorboard_debug_address flags are mutually " "exclusive.") hooks = [] if FLAGS.debug: config_file_path = (tempfile.mktemp(".tfdbg_config") if FLAGS.use_random_config_path else None) hooks.append( tf_debug.LocalCLIDebugHook(ui_type=FLAGS.ui_type, dump_root=FLAGS.dump_root, config_file_path=config_file_path)) elif FLAGS.tensorboard_debug_address: hooks.append( tf_debug.TensorBoardDebugHook(FLAGS.tensorboard_debug_address)) # Train model, using tfdbg hook. classifier.train(training_input_fn, steps=FLAGS.train_steps, hooks=hooks) # Evaluate accuracy, using tfdbg hook. accuracy_score = classifier.evaluate(test_input_fn, steps=FLAGS.eval_steps, hooks=hooks)["accuracy"] print("After training %d steps, Accuracy = %f" % (FLAGS.train_steps, accuracy_score)) # Make predictions, using tfdbg hook. predict_results = classifier.predict(test_input_fn, hooks=hooks) print("A prediction result: %s" % next(predict_results))
def experiment_fn(run_config, params): # 先定义estimator conversation = Conversation() estimator = tf.estimator.Estimator(model_fn=conversation.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) # 返回字典 vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) # 定义训练数据 train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = data_loader.make_batch( (train_X, train_y), batch_size=Config.model.batch_size) test_input_fn, test_input_hook = data_loader.make_batch( (test_X, test_y), batch_size=Config.model.batch_size, scope="test") train_hooks = [train_input_hook] if Config.train.print_verbose: train_hooks.append( hook.print_variables( variables=['train/enc_0', 'train/dec_0', 'train/pred_0'], rev_vocab=utils.get_rev_vocab(vocab), every_n_iter=Config.train.check_hook_n_iter)) if Config.train.debug: train_hooks.append(tf_debug.LocalCLIDebugHook()) eval_hooks = [test_input_hook] if Config.train.debug: eval_hooks.append(tf_debug.LocalCLIDebugHook()) # 定义实验 experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=train_hooks, eval_hooks=eval_hooks, eval_delay_secs=0) return experiment
def train_model(): from tensorflow.python import debug as tf_debug debug_hook = tf_debug.LocalCLIDebugHook() classifier.train(input_fn=lambda: fe.train_input_fn( FLAGS.train_data, FLAGS.batch_size), steps=1000, hooks=[ debug_hook, ])
def main(unused_argv): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' pred_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() pred_hooks = [debug_hook] model = tf.estimator.Estimator( model_fn=deeplab_model.deeplabv3_plus_model_fn, model_dir=FLAGS.model_dir, params={ 'output_stride': FLAGS.output_stride, 'batch_size': 1, # Batch size must be 1 because the images' size may differ 'base_architecture': FLAGS.base_architecture, 'pre_trained_model': None, 'batch_norm_decay': None, 'num_classes': _NUM_CLASSES, }) print("We are after estimator" + str(time.time()-startTime)) examples = dataset_util.read_examples_list(FLAGS.infer_data_list) image_files = [os.path.join(FLAGS.data_dir, filename +".jpg") for filename in examples] while True: predictions = model.predict( input_fn=lambda: preprocessing.eval_input_fn(image_files), hooks=pred_hooks) # predictions = model.predict( # input_fn=lambda: iter(cam), # hooks=pred_hooks) print("We are after prediction"+ str(time.time()-startTime)) output_dir = FLAGS.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) for pred_dict, image_path in zip(predictions, image_files): # ret, frame = cap.read() # cv2.imshow("camera", frame) # time.sleep(1) # cv2.waitKey(0) # cv2.imwrite("./img.jpg", frame) print(str(time.time() - startTime)) image_basename = os.path.splitext(os.path.basename(image_path))[0] output_filename = image_basename + '_mask.png' path_to_output = os.path.join(output_dir, output_filename) print("generating:", path_to_output) mask = pred_dict['decoded_labels'] mask = Image.fromarray(mask) plt.axis('off') plt.imshow(mask) # plt.show() plt.savefig(path_to_output, bbox_inches='tight')
def main(unused_argv): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' pred_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() pred_hooks = [debug_hook] model_dir = './models/model' # FLAGS.model_dir model = tf.estimator.Estimator( model_fn=deeplab_model.deeplabv3_model_fn, model_dir=model_dir, params={ 'output_stride': FLAGS.output_stride, 'batch_size': 1, # Batch size must be 1 because the images' size may differ 'base_architecture': FLAGS.base_architecture, 'pre_trained_model': None, 'batch_norm_decay': None, 'num_classes': _NUM_CLASSES, }) # examples = dataset_util.read_examples_list(FLAGS.infer_data_list) # image_files = [os.path.join(FLAGS.data_dir, filename) for filename in examples] img_folder = cfg.values['img_folder'] image_files = [os.path.join(img_folder, f) for f in os.listdir(img_folder)] # image_files = random.sample(image_files, 1) predictions = model.predict( input_fn=lambda: preprocessing.eval_input_fn(image_files), hooks=pred_hooks) output_dir = FLAGS.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) for pred_dict, image_path in zip(predictions, image_files): image_basename = os.path.splitext(os.path.basename(image_path))[0] # print(pred_dict['classes'].shape) output_filename = image_basename + '.png' path_to_output = os.path.join(output_dir, output_filename) print("generating:", path_to_output) # mask = merge_colors(pred_dict['decoded_labels']) label_image = np.squeeze(pred_dict['classes'], axis=2) label_image[label_image > 0] = cfg.values['bear_label'] # print([np.max(row) for row in label_image]) mask = Image.fromarray(label_image, mode='L') mask.save(path_to_output)