def main(unused_argv): params = {} params["PIPELINE_FILE"] = 'pipeline.config' params["MODEL_DIR"] = 'model.ckpt' #output directory params["NUM_STEPS"] = int(os.getenv("CK_NUM_STEPS", '1')) params["EVAL_TRAIN_DATA"] = os.getenv("CK_EVAL_TRAIN_DATA", False) params["SAMPLE_1_OF_N_EVAL_EXAMPLES"] = int( os.getenv("CK_SAMPLE_1_OF_N_EVAL_EXAMPLES", 1)) params["SAMPLE_1_OF_N_TRAIN_EXAMPLES"] = int( os.getenv("CK_SAMPLE_1_OF_N_TRAIN_EXAMPLES", 5)) params["HYPERPARAMS_OVERRIDE"] = os.getenv("CK_HYPERPARAMS_OVERRIDE", None) params["CHECKPOINT_DIR"] = os.getenv("CK_CHECKPOINT_DIR", None) params["RUN_ONCE"] = os.getenv("CK_RUN_ONCE", None) #flags.mark_flag_as_required('model_dir') #flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(params["MODEL_DIR"]) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(params["HYPERPARAMS_OVERRIDE"]), pipeline_config_path=params["PIPELINE_FILE"], train_steps=params["NUM_STEPS"], sample_1_of_n_eval_examples=params["SAMPLE_1_OF_N_EVAL_EXAMPLES"], sample_1_of_n_eval_on_train_examples=( params["SAMPLE_1_OF_N_TRAIN_EXAMPLES"])) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if params["CHECKPOINT_DIR"]: if params["EVAL_TRAIN_DATA"]: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if params["RUN_ONCE"]: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( params["CHECKPOINT_DIR"])) else: model_lib.continuous_eval(estimator, params["CHECKPOINT_DIR"], input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required("model_dir") flags.mark_flag_as_required("pipeline_config_path") config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples ), ) estimator = train_and_eval_dict["estimator"] train_input_fn = train_and_eval_dict["train_input_fn"] eval_input_fns = train_and_eval_dict["eval_input_fns"] eval_on_train_input_fn = train_and_eval_dict["eval_on_train_input_fn"] predict_input_fn = train_and_eval_dict["predict_input_fn"] train_steps = train_and_eval_dict["train_steps"] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = "training_data" input_fn = eval_on_train_input_fn else: name = "validation_data" # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate( input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint(FLAGS.checkpoint_dir), ) else: model_lib.continuous_eval( estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries, ) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False, ) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tpu_cluster_resolver = ( tf.contrib.cluster_resolver.TPUClusterResolver( tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu_grpc_url = tpu_cluster_resolver.get_master() config = tf.contrib.tpu.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=FLAGS.model_dir, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_shards)) kwargs = {} if FLAGS.train_batch_size: kwargs['batch_size'] = FLAGS.train_batch_size train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), use_tpu_estimator=True, use_tpu=FLAGS.use_tpu, num_shards=FLAGS.num_shards, save_final_config=FLAGS.mode == 'train', **kwargs) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.mode == 'train': estimator.train(input_fn=train_input_fn, max_steps=train_steps) # Continuously evaluating. if FLAGS.mode == 'eval': if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # Currently only a single eval input is allowed. input_fn = eval_input_fns[0] model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps, name)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tpu_cluster_resolver = ( tf.contrib.cluster_resolver.TPUClusterResolver( tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu_grpc_url = tpu_cluster_resolver.get_master() config = tf.contrib.tpu.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=FLAGS.model_dir, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_shards)) kwargs = {} if FLAGS.train_batch_size: kwargs['batch_size'] = FLAGS.train_batch_size train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), use_tpu_estimator=True, use_tpu=FLAGS.use_tpu, num_shards=FLAGS.num_shards, save_final_config=FLAGS.mode == 'train', **kwargs) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.mode == 'train': estimator.train(input_fn=train_input_fn, max_steps=train_steps) # Continuously evaluating. if FLAGS.mode == 'eval': if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # Currently only a single eval input is allowed. input_fn = eval_input_fns[0] model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps, name)
def train_and_eval(rpn_type=None, filter_fn_arg=None, replace_rpn_arg=None, number_of_stages=None): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, save_checkpoints_steps=10000) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), rpn_type=rpn_type, filter_fn_arg=filter_fn_arg, replace_rpn_arg=replace_rpn_arg, number_of_stages=number_of_stages) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) pass
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, keep_checkpoint_max=FLAGS.keep_checkpoint_max, save_checkpoints_steps=FLAGS.save_checkpoints_steps, log_step_count_steps=FLAGS.log_step_count_steps) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, num_eval_steps=FLAGS.num_eval_steps, eval_throttle_secs=FLAGS.eval_throttle_secs, eval_start_delay_secs=FLAGS.eval_start_delay_secs, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, eval_steps=FLAGS.num_eval_steps) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] print('train_and_eval_dict: ' + str(train_and_eval_dict)) if FLAGS.checkpoint_dir: print('enter into "FLAGS.checkpoint_dir:"') if FLAGS.eval_training_data: print('enter into "if FLAGS.eval_training_data:"') name = 'training_data' input_fn = eval_on_train_input_fn else: print('enter into else of "if FLAGS.eval_training_data:"') name = 'validation_data' input_fn = eval_input_fn if FLAGS.run_once: print('enter into "if FLAGS.run_once:"') estimator.evaluate(input_fn, eval_steps, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: print('enter into else of "if FLAGS.run_once:"') model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps, train_steps, name) else: print('enter into else of "FLAGS.checkpoint_dir:"') train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fn, eval_on_train_input_fn, predict_input_fn, train_steps, eval_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.logging.set_verbosity(tf.logging.INFO) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tf.logging.set_verbosity(tf.logging.INFO) #session_config = tf.ConfigProto() #session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 #session_config.gpu_options.allow_growth = True #config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, session_config=session_config) config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): #There are two arguments that must be provided. flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') #The variable 'config' contains instructions on saving checkpoints and #general training info. config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, log_step_count_steps=10, save_checkpoints_steps=300) # the 'create_estimator_and_inputs' function train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams( FLAGS.hparams_overrides), #this is # worth paying attention to, considering that I will need to do some tuning. pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): pipeline_config_path = "D:\\project3_faster_rcnn\\models-master\\research\\hat_dataset\\hat_resnet50_config.config" model_dir = "D:\\project3_faster_rcnn\\models-master\\research\\hat_dataset\\checkpoints\\" config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] config = tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.9)) config.gpu_options.allow_growth = True session = tf.Session(config=config) set_session(session) if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('train_dir') flags.mark_flag_as_required('pipeline_config_path') #config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) config = tf.estimator.RunConfig( 'path where the training output files will be generated') train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), #pipeline_config_path=FLAGS.pipeline_config_path, pipeline_config_path='path of the model config file', train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: #if ('C:/Users/zansh/Anaconda3/Lib/site-packages/object_detection1/training/'): if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(_): """ Main program. :param _: unused parameter. :return: void. """ config = tf.estimator.RunConfig(model_dir=TRAINER_CONFIG["checkpoints_dir"]) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(TRAINER_CONFIG["hparams_override"]), pipeline_config_path=TRAINER_CONFIG["pipeline_config_path"], train_steps=TRAINER_CONFIG["num_train_steps"], sample_1_of_n_eval_examples=TRAINER_CONFIG["sample_1_of_n_eval_examples"], sample_1_of_n_eval_on_train_examples=(TRAINER_CONFIG["sample_1_of_n_eval_on_train_example"])) estimator = train_and_eval_dict["estimator"] train_input_fn = train_and_eval_dict["train_input_fn"] eval_input_fns = train_and_eval_dict["eval_input_fns"] eval_on_train_input_fn = train_and_eval_dict["eval_on_train_input_fn"] predict_input_fn = train_and_eval_dict["predict_input_fn"] train_steps = train_and_eval_dict["train_steps"] # Train model from checkpoints. if args.checkpoints and len(os.listdir(TRAINER_CONFIG["checkpoints_dir"])) > 1: if TRAINER_CONFIG["eval_training_data"]: name = "training_data" input_fn = eval_on_train_input_fn else: name = "validation_data" input_fn = eval_input_fns[0] if args.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint(TRAINER_CONFIG["checkpoints_dir"])) else: model_lib.continuous_eval(estimator, TRAINER_CONFIG["checkpoints_dir"], input_fn, train_steps, name) # Train model from scratch. else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=TRAINER_CONFIG["eval_training_data"]) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): #flags.mark_flag_as_required('model_dir') #flags.mark_flag_as_required('pipeline_config_path') if not FLAGS.model_dir: raise ValueError('You must supply the mode_dir') if not FLAGS.pipeline_config_path: raise ValueError('You must supply the pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, eval_steps=FLAGS.num_eval_steps) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' input_fn = eval_input_fn if FLAGS.run_once: estimator.evaluate(input_fn, eval_steps, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fn, eval_on_train_input_fn, predict_input_fn, train_steps, eval_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tpu_cluster_resolver = ( tf.contrib.cluster_resolver.python.training.TPUClusterResolver( tpu_names=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu_grpc_url = tpu_cluster_resolver.get_master() config = tpu_config.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=FLAGS.model_dir, tpu_config=tpu_config.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_shards)) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, eval_steps=FLAGS.num_eval_steps, use_tpu_estimator=True, use_tpu=FLAGS.use_tpu, num_shards=FLAGS.num_shards, batch_size=FLAGS.train_batch_size) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fn = train_and_eval_dict['eval_input_fn'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] train_steps = train_and_eval_dict['train_steps'] eval_steps = train_and_eval_dict['eval_steps'] if FLAGS.mode == 'train': estimator.train(input_fn=train_input_fn, max_steps=train_steps) # Continuously evaluating. if FLAGS.mode == 'eval': if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' input_fn = eval_input_fn model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps, train_steps, name)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # strategy = tf.contrib.distribute.MirroredStrategy(num_gpus=4) # for multi-gpu # config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, train_distribute=strategy) # for multi-gpu config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') configs = config_util.get_configs_from_pipeline_file(FLAGS.pipeline_config_path) configs['model'].ssd.freeze_batchnorm = FLAGS.freeze_batchnorm configs['model'].ssd.num_classes = FLAGS.num_classes config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=configs, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(_): with open('system_dict.json') as json_file: args = json.load(json_file) config = tf.estimator.RunConfig(model_dir=args["model_dir"]) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=args["pipeline_config_path"], train_steps=args["num_train_steps"], sample_1_of_n_eval_examples=args["sample_1_of_n_eval_examples"], sample_1_of_n_eval_on_train_examples=( args["sample_1_of_n_eval_on_train_examples"])) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if args["checkpoint_dir"]: if args["eval_training_data"]: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if args["run_once"]: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( args["checkpoint_dir"])) else: model_lib.continuous_eval(estimator, args["checkpoint_dir"], input_fn, train_steps, name, args["max_eval_retries"]) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(_): model_dir = os.path.join(os.getcwd(), FLAGS.model_dir) pipeline_config_path = os.path.join(os.getcwd(), FLAGS.pipeline_config_path) num_train_steps = FLAGS.num_train_steps config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=pipeline_config_path, train_steps=num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=FLAGS.sample_1_of_n_eval_on_train_examples) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(hparams_overrides), pipeline_config_path=pipeline_config_path, train_steps=num_train_steps, sample_1_of_n_eval_examples=sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if checkpoint_dir: if eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' ## The first eval input will be evaluated. input_fn = eval_input_fns[0] if run_once: estimator.evaluate( input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint(checkpoint_dir)) else: model_lib.continuous_eval(estimator, checkpoint_dir, input_fn, train_steps, name, max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) ## Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') #config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) # Get info about full dataset dataset,videos = get_dataset(data_info) # Get experiment information from FLAGS name = FLAGS.name num_cycles = int(FLAGS.cycles) run_num = int(FLAGS.run) #num_steps = str(train_config.num_steps) epochs = int(FLAGS.epochs) restart_cycle = int(FLAGS.restart_from_cycle) if FLAGS.checkpoint_dir: """ if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) """ else: # Load active set from cycle 0 and point to right model if restart_cycle==0: model_dir = FLAGS.model_dir + 'R' + str(run_num) + 'cycle0/' #train_config.fine_tune_checkpoint = model_dir + 'model.ckpt' else: model_dir = FLAGS.model_dir + name + 'R' + str(run_num) + 'cycle' + str(restart_cycle) + '/' # Get actual checkpoint model #with open(model_dir+'checkpoint','r') as cfile: # line = cfile.readlines() # train_config.fine_tune_checkpoint = line[0].split(' ')[1][1:-2] active_set = [] unlabeled_set=[] with open(model_dir + 'active_set.txt', 'r') as f: for line in f: active_set.append(int(line)) for cycle in range(restart_cycle+1,num_cycles+1): #### Evaluation of trained model on unlabeled set to obtain data for selection if 'Rnd' not in name and cycle < num_cycles: eval_train_dir = model_dir + name + 'R' + str(run_num) + 'cycle' + str(cycle) + 'eval_train/' if os.path.exists(eval_train_dir + 'detections.dat'): with open(eval_train_dir + 'detections.dat','rb') as infile: ###### pdb remove latinq detected_boxes = pickle.load(infile) #detected_boxes = pickle.load(infile,encoding='latin1') else: # Get unlabeled set data_info['output_path'] = FLAGS.data_dir + 'AL/tfrecords/' + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '_unlabeled.record' # Do not evaluate labeled samples, their neighbors or unverified frames aug_active_set = sel.augment_active_set(dataset,videos,active_set,num_neighbors=5) unlabeled_set = [f['idx'] for f in dataset if f['idx'] not in aug_active_set and f['verified']] # For TCFP, we need to get detections for pretty much every frame, # as not candidates can may be used to support candidates if ('TCFP' in name): unlabeled_set = [i for i in range(len(dataset))] print('Unlabeled frames in the dataset: {}'.format(len(unlabeled_set))) save_tf_record(data_info,unlabeled_set) """ configs = config_util.get_configs_from_pipeline_file(pipeline_config_path=FLAGS.pipeline_config_path, config_override=None) eval_input_configs = configs['eval_input_configs'] eval_config = configs['eval_config'] model_config = configs['model'] eval_input_configs = configs['eval_input_configs'] MODEL_BUILD_UTIL_MAP = {'create_eval_input_fn': inputs.create_eval_input_fn} create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn'] eval_input_fns = [create_eval_input_fn( eval_config=eval_config, eval_input_config=eval_input_config, model_config=model_config) for eval_input_config in eval_input_configs] """ # Set number of eval images to number of unlabeled samples and point to tfrecord #eval_input_config.tf_record_input_reader.input_path[0] = data_info['output_path'] #eval_config.num_examples = len(unlabeled_set) model_dir = FLAGS.model_dir + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '/' config = tf.estimator.RunConfig(model_dir=model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( Unlabeled_set_length=len(unlabeled_set), Active_set_length=len(active_set), epochs=epochs, data_info=data_info, FLAGS=FLAGS, restart_cycle=restart_cycle, run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=(FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] """ train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) def get_next_eval_train(config): return dataset_builder.make_initializable_iterator( dataset_builder.build(config)).get_next() # Initialize input dict again (necessary?) #create_eval_train_input_dict_fn = functools.partial(get_next_eval_train, eval_input_config) graph_rewriter_fn = None if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=False) # Need to reset graph for evaluation tf.reset_default_graph() #if FLAGS.eval_training_data: #name = 'evaluation_of_training_data' #input_fn = eval_on_train_input_fn #else: # name = 'validation_data' # # The first eval input will be evaluated. """ input_fn = eval_input_fns[0] if FLAGS.run_once: predictions=estimator.evaluate(input_fn, checkpoint_path=tf.train.latest_checkpoint(eval_train_dir)) else: pdb.set_trace() model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) pdb.set_trace() #visualize_detections(dataset, unlabeled_set, detected_boxes, groundtruth_boxes) with open(eval_train_dir + 'detections.dat','wb') as outfile: pickle.dump(detected_boxes,outfile, protocol=pickle.HIGHEST_PROTOCOL) print('Done computing detections in training set') # Remove tfrecord used for training if os.path.exists(data_info['output_path']): os.remove(data_info['output_path']) #### Training of current cycle model_dir = FLAGS.model_dir + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '/' config = tf.estimator.RunConfig(model_dir=model_dir) # Budget for each cycle is the number of videos (0.5% of train set) if ('Rnd' in name): #indices = select_random_video(dataset,videos,active_set) #indices = sel.select_random(dataset,videos,active_set,budget=num_videos) indices = sel.select_random_video(dataset,videos,active_set) else: if ('Ent' in name): indices = sel.select_entropy_video(dataset,videos,FLAGS.data_dir,active_set,detected_boxes) elif ('Lst' in name): indices = sel.select_least_confident_video(dataset,videos,active_set,detected_boxes) elif ('TCFP' in name): indices = sel.select_TCFP_per_video(dataset,videos,FLAGS.data_dir,active_set,detected_boxes) elif ('FP_gt' in name): indices = sel.selectFpPerVideo(dataset,videos,active_set,detected_boxes,groundtruth_boxes,cycle) elif ('FN_gt' in name): indices = sel.selectFnPerVideo(dataset,videos,active_set,detected_boxes,groundtruth_boxes,cycle) elif ('FPN' in name): indices = sel.select_FPN_PerVideo(dataset,videos,active_set,detected_boxes,groundtruth_boxes,cycle) active_set.extend(indices) data_info['output_path'] = FLAGS.data_dir + 'AL/tfrecords/' + name + 'R' + str(run_num) + 'cycle' + str(cycle) + '.record' save_tf_record(data_info,active_set) pdb.set_trace() train_and_eval_dict = model_lib.create_estimator_and_inputs( Unlabeled_set_length=len(unlabeled_set), Active_set_length=len(active_set), epochs=epochs, data_info=data_info, FLAGS=FLAGS, restart_cycle=restart_cycle, run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=(FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) print('-----------------train and evaluation-------------------------') # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) #Save active_set in train dir in case we want to restart training with open(model_dir + 'active_set.txt', 'w') as f: for item in active_set: f.write('{}\n'.format(item)) # Remove tfrecord used for training if os.path.exists(data_info['output_path']): os.remove(data_info['output_path'])
def main(unused_argv): print("*** train.py/main()") # flags.mark_flag_as_required('model_dir') # flags.mark_flag_as_required('pipeline_config_path') print('*** FLAGS ***') print("pipeline_config_path:", FLAGS.pipeline_config_path) ## --verification - debug print("config exists:", os.path.exists(FLAGS.pipeline_config_path)) dir_list = [f for f in listdir(".")] for item in dir_list: print("file:", item) print("model_dir:", FLAGS.model_dir) print("train:", FLAGS.train) print("val:", FLAGS.val) print("sample_1_of_n_eval_examples:", FLAGS.sample_1_of_n_eval_examples) print("hparams_overrides:", FLAGS.hparams_overrides) print("checkpoint_dir:", FLAGS.checkpoint_dir) # check pipeline config pararameters # - input data pipeline_config_dict = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) check_input_data_existance(pipeline_config_dict) print(" - - - - - - - - -") config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) tf.enable_eager_execution() tf.set_random_seed(0) tf.logging.set_verbosity(tf.logging.ERROR) # Creates `Estimator`, input functions, and steps train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) # so here are the outputs (that were in a dict) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.amp: os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "1" else: os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "0" # Set seed to reduce randomness np.random.seed(FLAGS.seed) tf.set_random_seed(FLAGS.seed) hvd.init() flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') session_config = tf.ConfigProto() session_config.gpu_options.per_process_gpu_memory_fraction=0.9 session_config.gpu_options.visible_device_list = str(hvd.local_rank()) if FLAGS.allow_xla: session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 model_dir = FLAGS.model_dir if hvd.rank() == 0 else None config = tf.estimator.RunConfig(tf_random_seed=(FLAGS.seed + hvd.rank()), model_dir=model_dir, session_config=session_config) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, eval_count=FLAGS.eval_count, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) train_hooks = [hvd.BroadcastGlobalVariablesHook(0), DLLoggerHook(hvd.size()*train_and_eval_dict['train_batch_size'], hvd.rank())] eval_hooks = [] for x in range(FLAGS.eval_count): estimator.train(train_input_fn, hooks=train_hooks, steps=train_steps // FLAGS.eval_count) if hvd.rank() == 0 and not FLAGS.train_only: eval_input_fn = eval_input_fns[0] results = estimator.evaluate(eval_input_fn, steps=None, hooks=eval_hooks)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') tpu_cluster_resolver = (tf.contrib.cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu_name.split(','), zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) tpu_grpc_url = tpu_cluster_resolver.get_master() config = tf.contrib.tpu.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=FLAGS.model_dir, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_shards), save_checkpoints_steps=FLAGS.eval_every_n_iters) kwargs = {} if FLAGS.train_batch_size: kwargs['batch_size'] = FLAGS.train_batch_size train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples), use_tpu_estimator=True, use_tpu=FLAGS.use_tpu, num_shards=FLAGS.num_shards, **kwargs) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.mode == 'train': estimator.train(input_fn=train_input_fn, max_steps=train_steps) # Continuously evaluating. if FLAGS.mode == 'eval': if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # Currently only a single eval input is allowed. input_fn = eval_input_fns[0] model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, train_steps, name) if FLAGS.mode == 'train_and_eval': # load last checkpoint and start from there current_step = load_global_step_from_checkpoint_dir(FLAGS.model_dir) while current_step < train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.eval_every_n_iters, train_steps) estimator.train(input_fn=train_input_fn, max_steps=next_checkpoint) current_step = next_checkpoint # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate at step %d', next_checkpoint) # Background evaluation process. args = [ 'PYTHONPATH=$PYTHONPATH:slim', 'python', 'object_detection/model_main.py', '--pipeline_config_path', FLAGS.pipeline_config_path, '--model_dir', FLAGS.model_dir, '--checkpoint_dir', FLAGS.model_dir, '--sample_1_of_n_eval_examples', str(FLAGS.sample_1_of_n_eval_examples), '--gpu_memory_fraction', str(0.2), '--run_once', str(True) ] print(' '.join(args)) p = subprocess.Popen(' '.join(args), stderr=subprocess.STDOUT, shell=True)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) # config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, # save_checkpoints_steps=5000) config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, #save_checkpoints_steps=5000, save_checkpoints_steps=1000, save_checkpoints_secs=None, keep_checkpoint_max= None, # useless: model_lib.py tf.train.Saver-> max_to_keep take effect keep_checkpoint_every_n_hours=10000) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] # import pdb # pdb.set_trace() if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. # throttle_secs is not documented in eval.proto. This replaces eval_interval_secs somewhat throttle_secs = 60 * 60 # every 60 min eval_spec = eval_specs[0] my_eval_spec = tf.estimator.EvalSpec( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=None, exporters=eval_spec.exporters, start_delay_secs= 1800, # 30 minutes - does not seem to be respected... throttle_secs=throttle_secs) print('=========== my_eval_spec') print(my_eval_spec) print('=========================') tf.estimator.train_and_evaluate(estimator, train_spec, my_eval_spec)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) hvd.init() flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True session_config.gpu_options.visible_device_list = str(hvd.local_rank()) if FLAGS.allow_xla: session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 model_dir = FLAGS.model_dir if hvd.rank() == 0 else None config = tf.estimator.RunConfig(model_dir=model_dir, session_config=session_config) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, eval_count=FLAGS.eval_count, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) train_hooks = [hvd.BroadcastGlobalVariablesHook(0)] eval_hooks = [] for x in range(FLAGS.eval_count): estimator.train(train_input_fn, hooks=train_hooks, steps=train_steps // FLAGS.eval_count) if hvd.rank() == 0: eval_input_fn = eval_input_fns[0] results = estimator.evaluate(eval_input_fn, steps=None, hooks=eval_hooks)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') if FLAGS.gpu_device is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_device) session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, session_config=session_config, save_checkpoints_secs=FLAGS.save_checkpoints_secs) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Multiple Eval Specs allowed. # TODO: Fix name of saving_listeners saving_listeners = [ EvalCheckpointSaverListener(estimator, eval_specs[0].input_fn, 'validation') ] if len(eval_specs) > 1: saving_listeners.append( EvalCheckpointSaverListener(estimator, eval_specs[1].input_fn, 'training')) estimator.train(input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, saving_listeners=saving_listeners)
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.per_process_gpu_memory_fraction = 0.95 session_config.gpu_options.visible_device_list = "1" log_step_cnt = 10 save_checkpoints_steps = 25000 tf.logging.set_verbosity(tf.logging.INFO) config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, session_config=session_config, log_step_count_steps=log_step_cnt) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, #num_eval_steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') current_time_start = datetime.now(tz_SG).strftime('%d-%m-%Y %H:%M:%S') start=time.time() log_directory = os.path.join(os.getcwd(),FLAGS.model_dir) #make directory for log files if not os.path.exists(log_directory): os.makedirs(log_directory) print("Logging will be found in {}".format(log_directory)) log_file = os.path.join(log_directory, 'log.txt') # create file handler which logs event debug messages log = logging.getLogger('tensorflow') log.root.handlers[0].setFormatter(CsvFormatter(output_csv = os.path.join(log_directory, 'log.csv'))) #log.disable(logging.WARNING) log.addFilter(StepLossFilter()) config2 = tf.ConfigProto() config2.gpu_options.allow_growth = True log.setLevel(logging.INFO) #formatter = logging.Formatter('%(levelname)s - %(message)s') formatter = logging.Formatter('%(message)s') #FileHandler is used to send the log entries to a file fh = logging.FileHandler(log_file) print("File handler: {}".format(fh)) fh.setLevel(logging.INFO) fh.setFormatter(formatter) log.addHandler(fh) # StreamHandler is used to send the log entries to console ch = logging.StreamHandler() ch.addFilter(StepLossFilter()) ch.setLevel(logging.INFO) ch.setFormatter(formatter) log.addHandler(ch) #Log the estimator steps config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir,save_checkpoints_steps=500, log_step_count_steps=100,session_config=config2) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) end=time.time() current_time_end = datetime.now(tz_SG).strftime('%d-%m-%Y %H:%M:%S') log.info("Started: {}".format(current_time_start)) log.info("Ended: {}".format(current_time_end)) log.info("Duration: {} secs".format(round(end-start,0)))
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') # substitute the AML data reference mount points for relevant parts in the pipeline.config and overwrite with open(FLAGS.pipeline_config_path) as f: config_file = f.read() new_config_file = config_file.replace( '$AZUREML_DATAREFERENCE_tfrecords', actual_path_tfrecords).replace('$AZUREML_DATAREFERENCE_artifacts', actual_path_artifacts) with open(FLAGS.pipeline_config_path, 'w') as f: f.write(new_config_file) print('model_main.py, main(), finished substituting mount points.') config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_steps= 104012 # save less often than default - 1/5 of an epoch ) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, hparams=model_hparams.create_hparams(FLAGS.hparams_overrides), pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Currently only a single Eval Spec is allowed. # throttle_secs is not documented in eval.proto. This replaces eval_interval_secs somewhat throttle_secs = 60 * 60 # every 60 min eval_spec = eval_specs[0] my_eval_spec = tf.estimator.EvalSpec( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=None, exporters=eval_spec.exporters, start_delay_secs= 1800, # 30 minutes - does not seem to be respected... throttle_secs=throttle_secs) print('=========== my_eval_spec') print(my_eval_spec) print('=========================') tf.estimator.train_and_evaluate(estimator, train_spec, my_eval_spec)