def __init__(self, config, dataset): self.config = config self.train_dir = config.train_dir log.info("self.train_dir = %s", self.train_dir) # --- input ops --- self.batch_size = config.batch_size self.dataset = dataset check_data_id(dataset, config.data_id) _, self.batch = create_input_ops(dataset, self.batch_size, data_id=config.data_id, is_training=False, shuffle=False) # --- create model --- Model = self.get_model_class(config.model) self.model = Model(config) self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None) self.step_op = tf.no_op(name='step_no_op') tf.set_random_seed(1234) session_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True), device_count={'GPU': 1}, ) self.session = tf.Session(config=session_config) # --- checkpoint and monitoring --- self.saver = tf.train.Saver(max_to_keep=100) self.checkpoint_path = config.checkpoint_path if self.checkpoint_path is None and self.train_dir: self.checkpoint_path = tf.train.latest_checkpoint(self.train_dir) if self.checkpoint_path is None: log.warn("No checkpoint is given. Just random initialization :-)") self.session.run(tf.global_variables_initializer()) else: log.info("Checkpoint path : %s", self.checkpoint_path)
def __init__(self, config, dataset, dataset_test): self.config = config hyper_parameter_str = config.dataset+'_lr_'+str(config.learning_rate)+'_update_G'+str(config.update_rate)+'_D'+str(1) self.train_dir = './train_dir/%s-%s-%s-%s' % ( config.model, config.prefix, hyper_parameter_str, time.strftime("%Y%m%d-%H%M%S") ) if not os.path.exists(self.train_dir): os.makedirs(self.train_dir) log.infov("Train Dir: %s", self.train_dir) # --- input ops --- self.batch_size = config.batch_size _, self.batch_train = create_input_ops(dataset, self.batch_size, is_training=True) _, self.batch_test = create_input_ops(dataset_test, self.batch_size, is_training=False) # --- create model --- Model = self.get_model_class(config.model) log.infov("Using Model class : %s", Model) self.model = Model(config) # --- optimizer --- self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None) self.learning_rate = config.learning_rate if config.lr_weight_decay: self.learning_rate = tf.train.exponential_decay( self.learning_rate, global_step=self.global_step, decay_steps=10000, decay_rate=0.5, staircase=True, name='decaying_learning_rate' ) # print all the trainable variables #tf.contrib.slim.model_analyzer.analyze_vars(tf.trainable_variables(), print_info=True) #self.check_op = tf.add_check_numerics_ops() self.check_op = tf.no_op() # --- checkpoint and monitoring --- all_vars = tf.trainable_variables() d_var = [v for v in all_vars if v.name.startswith('Discriminator')] log.warn("********* d_var ********** "); slim.model_analyzer.analyze_vars(d_var, print_info=True) g_var = [v for v in all_vars if v.name.startswith(('Generator'))] log.warn("********* g_var ********** "); slim.model_analyzer.analyze_vars(g_var, print_info=True) rem_var = (set(all_vars) - set(d_var) - set(g_var)) print([v.name for v in rem_var]); assert not rem_var self.d_optimizer = tf.contrib.layers.optimize_loss( loss=self.model.d_loss, global_step=self.global_step, learning_rate=self.learning_rate*0.5, optimizer=tf.train.AdamOptimizer(beta1=0.5), clip_gradients=20.0, name='d_optimize_loss', variables=d_var ) self.g_optimizer = tf.contrib.layers.optimize_loss( loss=self.model.g_loss, global_step=self.global_step, learning_rate=self.learning_rate, optimizer=tf.train.AdamOptimizer(beta1=0.5), clip_gradients=20.0, name='g_optimize_loss', variables=g_var ) self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=100) self.summary_writer = tf.summary.FileWriter(self.train_dir) self.checkpoint_secs = 600 # 10 min self.supervisor = tf.train.Supervisor( logdir=self.train_dir, is_chief=True, saver=None, summary_op=None, summary_writer=self.summary_writer, save_summaries_secs=300, save_model_secs=self.checkpoint_secs, global_step=self.global_step, ) session_config = tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True), device_count={'GPU': 1}, ) self.session = self.supervisor.prepare_or_wait_for_session(config=session_config) self.ckpt_path = config.checkpoint if self.ckpt_path is not None: log.info("Checkpoint path: %s", self.ckpt_path) self.pretrain_saver.restore(self.session, self.ckpt_path) log.info("Loaded the pretrain parameters from the provided checkpoint path")