def train(sess, args): config_path = 'model_{}/config.json'.format(args.model_num) with open(config_path, 'r') as f: config = json.load(f) patch_size = config['patch_size'] batch_size = config['batch_size'] num_steps = config['num_steps'] quan_scale = config['quan_scale'] bitrate_reg_decay = config['bitrate_reg_decay'] overload_log = args.overload_log load_ckpt = args.load_ckpt reset_step = args.reset_step max_step = args.max_step lr_and_bound = args.lr_and_bound if max_step != 'None': num_steps = int(max_step) print('num_steps: ', num_steps) if (reset_step == 'off') and (load_ckpt == 'on' or overload_log == 'off'): utils.set_logger(log_path) logging.info('Not overload_log') else: utils.set_logger(log_path, mode='w') logging.info('Overload_log') global global_var additional_param = args.additional_param if additional_param == '0': pass elif additional_param == '1': pass elif additional_param == '2': pass elif additional_param == '3': pass train_data_list = train_data_list_tpl.format(patch_size) valid_data_list = valid_data_list_tpl.format(patch_size) data_batch, handle_placeholder, train_handle, valid_handle, valid_iterator = data_loader.get_train_and_valid_data_batch( sess, train_data_list, valid_data_list, batch_size, flip_ud=False, flip_lr=False, rot_90=False) # print(sess.run(data_batch)) # return # Avoid summary info logging.getLogger().setLevel(logging.WARNING) output = encoder(data_batch, patch_size, quan_scale) output = decoder(output, quan_scale) loss_op = get_loss(data_batch, output, bitrate_reg_decay) boundaries = config['boundaries'] lr_values = config['lr_values'] if lr_and_bound != 'None': start_lr = float(lr_and_bound.split(',')[0]) bound = lr_and_bound.split(',')[1:] boundaries = [int(item) for item in boundaries] lr_values = [start_lr, start_lr / 10, start_lr / 100] print('boundaries: {}, lr_values: {}'.format(boundaries, lr_values)) train_op, global_step_op, learning_rate_op = optimize( loss_op, boundaries, lr_values) saver = tf.train.Saver() if load_ckpt == 'on': saver.restore(sess, model_param_path) logging.info('Load previous params') else: variable_init = tf.global_variables_initializer() sess.run(variable_init) # saver.save(sess, model_param_path) # logging.info('Model paremeters saved to: {}'.format(model_param_path)) # return utils.add_trainable_variables_to_summary() if args.summary_save == 'on': summary_writer = tf.summary.FileWriter(summary_path, sess.graph) merged_summaries = tf.summary.merge_all() options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() many_runs_timeline = utils.TimeLiner() # Avoid summary info logging.getLogger().setLevel(logging.INFO) logging.info('-----') logging.info(args) logging.info(config) logging.info('-----') # debug mode if args.debug_mode == 'on': logging.info('-----') logging.info('Debug mode') logging.info('-----') return train_loss_display_step = 200 valid_loss_display_step = 20000 if reset_step == 'on': assign_op = tf.assign(global_step_op, 0) sess.run(assign_op) global_step = sess.run(global_step_op) # normal train for step in range(global_step + 1, num_steps + 1): if step % train_loss_display_step == 0: if args.summary_save == 'on': _, loss, global_step, learning_rate_value, summary_value = sess.run( [ train_op, loss_op, global_step_op, learning_rate_op, merged_summaries ], feed_dict={handle_placeholder: train_handle}, options=options, run_metadata=run_metadata) else: _, loss, global_step, learning_rate_value = sess.run( [train_op, loss_op, global_step_op, learning_rate_op], feed_dict={handle_placeholder: train_handle}, options=options, run_metadata=run_metadata) logging.info('Step: {:d}, loss: {:.8f}, lr: {:.8f}'.format( global_step, loss, learning_rate_value)) if step % valid_loss_display_step == 0: sess.run(valid_iterator.initializer) [valid_loss ] = sess.run([loss_op], feed_dict={handle_placeholder: valid_handle}, options=options, run_metadata=run_metadata) logging.info('Valid loss: {:.8f}'.format(valid_loss)) if args.param_save == 'on': saver.save(sess, model_param_path) # logging.info('Model paremeters saved to: {}'.format(model_param_path)) if args.summary_save == 'on': summary_writer.add_summary(summary_value, global_step=global_step) # logging.info('Summaries saved to: {}'.format(summary_path)) else: _, loss, global_step = sess.run( [train_op, loss_op, global_step_op], feed_dict={handle_placeholder: train_handle}, options=options, run_metadata=run_metadata) if args.timeline_save == 'on': many_runs_timeline.update_timeline(run_metadata.step_stats) # logging.info('{}_{}'.format(step, global_step)) if args.timeline_save == 'on': many_runs_timeline.save(timeline_path) logging.info('Timeline saved to: {}'.format(timeline_path))
def train(sess, args): config_path = 'model_{}/config.json'.format(args.model_num) with open(config_path, 'r') as f: config = json.load(f) patch_size = config['patch_size'] batch_size = config['batch_size'] num_steps = config['num_steps'] quan_scale = config['quan_scale'] overload_log = args.overload_log load_ckpt = args.load_ckpt if load_ckpt == 'on' or overload_log == 'off': utils.set_logger(log_path) logging.info('Not overload_log') else: utils.set_logger(log_path, mode='w') logging.info('Overload_log') global global_var additional_param = args.additional_param if additional_param == '0': pass elif additional_param == '1': pass elif additional_param == '2': pass elif additional_param == '3': pass data_batch, handle_placeholder, train_handle, valid_handle, valid_iterator = data_loader.get_train_and_valid_data_batch(sess, train_data_list, valid_data_list, batch_size, flip_ud=False, flip_lr=False, rot_90=False) # print(sess.run(data_batch)) # return # Avoid summary info logging.getLogger().setLevel(logging.WARNING) output_train = encoder(data_batch, patch_size, quan_scale, is_training=True) output_train = decoder(output_train, quan_scale, is_training=True) loss_op_train = get_loss(data_batch, output_train) train_op, global_step_op, learning_rate_op = optimize(loss_op_train, config) # ----- output_eval = encoder(data_batch, patch_size, quan_scale, is_training=False, reuse=True) output_eval = decoder(output_eval, quan_scale, is_training=False, reuse=True) loss_op_eval = get_loss(data_batch, output_eval) #----- saver = tf.train.Saver() if load_ckpt == 'on': saver.restore(sess, model_param_path) logging.info('Load previous params') else: variable_init = tf.global_variables_initializer() sess.run(variable_init) # saver.save(sess, model_param_path) # logging.info('Model paremeters saved to: {}'.format(model_param_path)) # return utils.add_trainable_variables_to_summary() if args.summary_save == 'on': summary_writer = tf.summary.FileWriter(summary_path, sess.graph) merged_summaries = tf.summary.merge_all() options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() many_runs_timeline = utils.TimeLiner() # Avoid summary info logging.getLogger().setLevel(logging.INFO) logging.info('-----') logging.info(args) logging.info(config) logging.info('-----') # debug mode if args.debug_mode == 'on': logging.info('-----') logging.info('Debug mode') logging.info('-----') return train_loss_display_step = 200 valid_loss_display_step = 20000 global_step = sess.run(global_step_op) # normal train for step in range(global_step + 1, num_steps + 1): if step % train_loss_display_step == 0: if args.summary_save == 'on': _, loss, global_step, learning_rate_value, summary_value = sess.run([train_op, loss_op_train, global_step_op, learning_rate_op, merged_summaries], feed_dict={handle_placeholder: train_handle}, options=options, run_metadata=run_metadata) else: _, loss, global_step, learning_rate_value = sess.run([train_op, loss_op_train, global_step_op, learning_rate_op], feed_dict={handle_placeholder: train_handle}, options=options, run_metadata=run_metadata) logging.info('Step: {:d}, loss: {:.8f}, lr: {:.8f}'.format(global_step, loss, learning_rate_value)) if step % valid_loss_display_step == 0: sess.run(valid_iterator.initializer) [valid_loss] = sess.run([loss_op_eval], feed_dict={handle_placeholder: valid_handle}, options=options, run_metadata=run_metadata) logging.info('Valid loss: {:.8f}'.format(valid_loss)) if args.param_save == 'on': saver.save(sess, model_param_path) # logging.info('Model paremeters saved to: {}'.format(model_param_path)) if args.summary_save == 'on': summary_writer.add_summary(summary_value, global_step=global_step) # logging.info('Summaries saved to: {}'.format(summary_path)) else: _, loss, global_step = sess.run([train_op, loss_op_train, global_step_op], feed_dict={handle_placeholder: train_handle}, options=options, run_metadata=run_metadata) if args.timeline_save == 'on': many_runs_timeline.update_timeline(run_metadata.step_stats) # logging.info('{}_{}'.format(step, global_step)) if args.timeline_save == 'on': many_runs_timeline.save(timeline_path) logging.info('Timeline saved to: {}'.format(timeline_path))