def main(model_config, train_config, track_config):
  # Create training directory
  train_dir = train_config['train_dir']
  if not tf.gfile.IsDirectory(train_dir):
    tf.logging.info('Creating training directory: %s', train_dir)
    tf.gfile.MakeDirs(train_dir)

  # Build the Tensorflow graph
  g = tf.Graph()
  with g.as_default():
    # Set fixed seed
    np.random.seed(train_config['seed'])
    tf.set_random_seed(train_config['seed'])

    # Build the model
    model = siamese_model.SiameseModel(model_config, train_config, mode='inference')
    model.build()

    # Save configurations for future reference
    save_cfgs(train_dir, model_config, train_config, track_config)

    saver = tf.train.Saver(tf.global_variables(),
                           max_to_keep=train_config['max_checkpoints_to_keep'])

    # Dynamically allocate GPU memory
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess_config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=sess_config)
    model_path = tf.train.latest_checkpoint(train_config['train_dir'])

    if not model_path:
      # Initialize all variables
      sess.run(tf.global_variables_initializer())
      sess.run(tf.local_variables_initializer())
      start_step = 0

      # Load pretrained embedding model if needed
      if model_config['embed_config']['embedding_checkpoint_file']:
        model.init_fn(sess)

    else:
      logging.info('Restore from last checkpoint: {}'.format(model_path))
      sess.run(tf.local_variables_initializer())
      saver.restore(sess, model_path)
      start_step = tf.train.global_step(sess, model.global_step.name) + 1

    checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt')
    saver.save(sess, checkpoint_path, global_step=start_step)
Exemple #2
0
def main(model_config, train_config):

    # Create training directory which will be used to save: configurations, model files, TensorBoard logs
    train_dir = train_config['train_dir']
    if not osp.isdir(train_dir):
        logging.info('Creating training directory: %s', train_dir)
        mkdir_p(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Set fixed seed for reproducible experiments
        random.seed(train_config['seed'])
        np.random.seed(train_config['seed'])
        tf.set_random_seed(train_config['seed'])

        # Build the training and validation model
        model = BiseNet(model_config,
                        train_config,
                        train_config['num_classes'],
                        mode="train")
        model.build(num_gpus=configuration.num_gpus, reuse=tf.AUTO_REUSE)
        model_va = BiseNet(model_config,
                           train_config,
                           train_config['num_classes'],
                           mode="validation")
        model_va.build(reuse=True)

        # Save configurations for future reference
        save_cfgs(train_dir, model_config, train_config)

        learning_rate = _configure_learning_rate(train_config,
                                                 model.global_step)
        optimizer = _configure_optimizer(train_config, learning_rate)
        tf.summary.scalar('learning_rate', learning_rate)

        # Set up the training ops
        tower_grads = []
        for i in range(configuration.num_gpus):
            with tf.device('/gpu:%d' % i):
                name_scope = ('clone_%d' % i) if i else ''
                with tf.name_scope(name_scope) as scope:
                    grads = optimizer.compute_gradients(model.total_loss[i])
                    tower_grads.append(grads)
        with tf.device('/cpu:0'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                grads_n_vars = _average_gradients(tower_grads)
                grad_updates = optimizer.apply_gradients(
                    grads_n_vars, global_step=model.global_step)
            model.total_loss = tf.reduce_mean(model.total_loss)

            # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            # update_ops.append(grad_updates)
            # update_op = tf.group(*update_ops)
            # with tf.control_dependencies(update_ops):
            #     train_op = tf.contrib.layers.optimize_loss(loss=model.total_loss,
            #                                               global_step=model.global_step,
            #                                               learning_rate=learning_rate,
            #                                               optimizer=optimizer,
            #                                               clip_gradients=train_config['clip_gradients'],
            #                                               learning_rate_decay_fn=None,
            #                                               summaries=['learning_rate'])

        saver = tf.train.Saver(
            tf.global_variables(),
            max_to_keep=train_config['max_checkpoints_to_keep'])

        summary_writer = tf.summary.FileWriter(train_dir, g)
        summary_op = tf.summary.merge_all()

        global_variables_init_op = tf.global_variables_initializer()
        local_variables_init_op = tf.local_variables_initializer()
        g.finalize()  # Finalize graph to avoid adding ops by mistake

        # Dynamically allocate GPU memory
        # gpu_options = tf.GPUOptions(allow_growth=True)
        # sess_config = tf.ConfigProto(gpu_options=gpu_options)

        # for multi gpu options. 'allow_soft_placement' must be set true
        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     log_device_placement=False)
        sess_config.gpu_options.allow_growth = False

        sess = tf.Session(config=sess_config)
        model_path = tf.train.latest_checkpoint(train_config['train_dir'])

        if not model_path:
            sess.run(global_variables_init_op)
            sess.run(local_variables_init_op)
            start_step = 0

            if model_config['frontend_config'][
                    'pretrained_dir'] and model.init_fn:
                model.init_fn(sess)
        else:
            logging.info('Restore from last checkpoint: {}'.format(model_path))
            sess.run(local_variables_init_op)
            saver.restore(sess, model_path)
            start_step = tf.train.global_step(sess, model.global_step.name) + 1

        # Training loop
        data_config = train_config['train_data_config']
        total_steps = int(data_config['epoch'] *
                          data_config['num_examples_per_epoch'] /
                          data_config['batch_size'])
        logging.info('Train for {} steps'.format(total_steps))
        for step in range(start_step, total_steps):
            start_time = time.time()
            # _, loss = sess.run([train_op, model.total_loss])
            _, loss = sess.run([grad_updates, model.total_loss])
            duration = time.time() - start_time

            if step % 10 == 0:
                examples_per_sec = data_config['batch_size'] / float(duration)
                time_remain = data_config['batch_size'] * (
                    total_steps - step) / examples_per_sec
                m, s = divmod(time_remain, 60)
                h, m = divmod(m, 60)
                format_str = (
                    '%s: step %d, total loss = %.2f, (%.1f examples/sec; %.3f '
                    'sec/batch; %dh:%02dm:%02ds remains)')
                logging.info(format_str %
                             (datetime.now(), step, loss, examples_per_sec,
                              duration, h, m, s))

            if step % 10 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            if step % train_config['save_model_every_n_step'] == 0 or (
                    step + 1) == total_steps:
                checkpoint_path = osp.join(train_config['train_dir'],
                                           'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
def main(model_config, train_config, track_config):
    os.environ['CUDA_VISIBLE_DEVICES'] = auto_select_gpu()

    # Create training directory which will be used to save: configurations, model files, TensorBoard logs
    train_dir = train_config['train_dir']
    if not osp.isdir(train_dir):
        logging.info('Creating training directory: %s', train_dir)
        mkdir_p(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Set fixed seed for reproducible experiments
        random.seed(train_config['seed'])
        np.random.seed(train_config['seed'])
        tf.set_random_seed(train_config['seed'])

        # Build the training and validation model
        model = siamese_model.SiameseModel(model_config,
                                           train_config,
                                           mode='train')
        model.build()
        model_va = siamese_model.SiameseModel(model_config,
                                              train_config,
                                              mode='validation')
        model_va.build(reuse=True)

        # Save configurations for future reference
        save_cfgs(train_dir, model_config, train_config, track_config)

        learning_rate = _configure_learning_rate(train_config,
                                                 model.global_step)
        optimizer = _configure_optimizer(train_config, learning_rate)
        tf.summary.scalar('learning_rate', learning_rate)

        # Set up the training ops
        opt_op = tf.contrib.layers.optimize_loss(
            loss=model.total_loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=optimizer,
            clip_gradients=train_config['clip_gradients'],
            learning_rate_decay_fn=None,
            summaries=['learning_rate'])

        with tf.control_dependencies([opt_op]):
            train_op = tf.no_op(name='train')

        saver = tf.train.Saver(
            tf.global_variables(),
            max_to_keep=train_config['max_checkpoints_to_keep'])

        summary_writer = tf.summary.FileWriter(train_dir, g)
        summary_op = tf.summary.merge_all()

        global_variables_init_op = tf.global_variables_initializer()
        local_variables_init_op = tf.local_variables_initializer()
        g.finalize()  # Finalize graph to avoid adding ops by mistake

        # Dynamically allocate GPU memory
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess_config = tf.ConfigProto(gpu_options=gpu_options)

        sess = tf.Session(config=sess_config)
        model_path = tf.train.latest_checkpoint(train_config['train_dir'])

        if not model_path:
            sess.run(global_variables_init_op)
            sess.run(local_variables_init_op)
            start_step = 0

            if model_config['embed_config']['embedding_checkpoint_file']:
                model.init_fn(sess)
        else:
            logging.info('Restore from last checkpoint: {}'.format(model_path))
            sess.run(local_variables_init_op)
            saver.restore(sess, model_path)
            start_step = tf.train.global_step(sess, model.global_step.name) + 1

        # Training loop
        data_config = train_config['train_data_config']
        total_steps = int(data_config['epoch'] *
                          data_config['num_examples_per_epoch'] /
                          data_config['batch_size'])
        logging.info('Train for {} steps'.format(total_steps))
        for step in range(start_step, total_steps):
            start_time = time.time()
            _, loss, batch_loss = sess.run(
                [train_op, model.total_loss, model.batch_loss])
            duration = time.time() - start_time

            if step % 10 == 0:
                examples_per_sec = data_config['batch_size'] / float(duration)
                time_remain = data_config['batch_size'] * (
                    total_steps - step) / examples_per_sec
                m, s = divmod(time_remain, 60)
                h, m = divmod(m, 60)
                format_str = (
                    '%s: step %d, total loss = %.2f, batch loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch; %dh:%02dm:%02ds remains)')
                logging.info(format_str %
                             (datetime.now(), step, loss, batch_loss,
                              examples_per_sec, duration, h, m, s))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            if step % train_config['save_model_every_n_step'] == 0 or (
                    step + 1) == total_steps:
                checkpoint_path = osp.join(train_config['train_dir'],
                                           'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Exemple #4
0
def main(model_config, train_config, track_config):
  os.environ['CUDA_VISIBLE_DEVICES'] = auto_select_gpu()

  # Create training directory which will be used to save: configurations, model files, TensorBoard logs
  train_dir = train_config['train_dir']
  if not osp.isdir(train_dir):
    logging.info('Creating training directory: %s', train_dir)
    mkdir_p(train_dir)

  if have_cfgs(train_dir):
    model_config, train_config, track_config = load_cfgs(train_dir)
    print("=================== load cfg ")
  else:
    save_cfgs(train_dir, model_config, train_config, track_config)
    print("=================== save default cfg, please modify files in {}".format(train_dir))
    return

  g = tf.Graph()
  with g.as_default():
    # Set fixed seed for reproducible experiments
    random.seed(train_config['seed'])
    np.random.seed(train_config['seed'])
    tf.set_random_seed(train_config['seed'])

    # Build the training and validation model
    model = siamese_model.SiameseModel(model_config, train_config, track_config, mode='train')
    model.build()
    model_va = siamese_model.SiameseModel(model_config, train_config, track_config, mode='validation')
    model_va.build(reuse=True)

    learning_rate = _configure_learning_rate(train_config, model.global_step)
    optimizer = _configure_optimizer(train_config, learning_rate)
    tf.summary.scalar('learning_rate', learning_rate)

    # general way for run train: https://qiita.com/horiem/items/00ec6488b23895cc4fe2
    # tensorflow 2.1: https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough
    # Set up the training ops
    opt_op = tensorflow.contrib.layers.optimize_loss(
      loss=model.total_loss,
      global_step=model.global_step,
      learning_rate=learning_rate,
      optimizer=optimizer,
      clip_gradients=train_config['clip_gradients'],
      learning_rate_decay_fn=None,
      summaries=['learning_rate'])

    with tf.control_dependencies([opt_op]):
      train_op = tf.no_op(name='train')

    saver = tf.train.Saver(tf.global_variables(),
                           max_to_keep=train_config['max_checkpoints_to_keep'])

    summary_writer = tf.summary.FileWriter(train_dir, g)
    summary_op = tf.summary.merge_all()

    global_variables_init_op = tf.global_variables_initializer()
    local_variables_init_op = tf.local_variables_initializer()
    g.finalize()  # Finalize graph to avoid adding ops by mistake

    # Dynamically allocate GPU memory
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess_config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=sess_config)
    model_path = tf.train.latest_checkpoint(train_config['train_dir'])

    if not model_path:
      sess.run(global_variables_init_op)
      sess.run(local_variables_init_op)
      start_step = 0

      if model_config['embed_config']['embedding_checkpoint_file']:
        model.init_fn(sess)
    else:
      logging.info('Restore from last checkpoint: {}'.format(model_path))
      sess.run(local_variables_init_op)
      saver.restore(sess, model_path)
      start_step = tf.train.global_step(sess, model.global_step.name) + 1

    # export
    if train_config["export"]:
      # still debugging
      '''
      frozen_graph_def = tf.graph_util.convert_variables_to_constants(sess, tf.get_default_graph().as_graph_def(), ["train/detection/add"])
      frozen_graph = tf.Graph()
      with frozen_graph.as_default():
        tf.import_graph_def(frozen_graph_def)
        save_model_dir = osp.join(train_config['train_dir'], 'models')
        tf.train.write_graph(frozen_graph_def, save_model_dir, 'quantized_frozen_graph.pb', as_text=False)
        tf.train.write_graph(frozen_graph_def, save_model_dir, 'quantized_frozen_graph.pbtxt', as_text=True)

        output_op = sess.graph.get_tensor_by_name("validation/detection/add:0")
        input1_op = sess.graph.get_tensor_by_name("validation/template_image:0")
        input2_op = sess.graph.get_tensor_by_name("validation/input_image:0")

        converter = tf.lite.TFLiteConverter.from_session(sess, [input1_op, input2_op], [output_op])
        converter.inference_type = tf.lite.constants.QUANTIZED_UINT8
        input_arrays = converter.get_input_arrays()
        converter.quantized_input_stats = {input_arrays[0] : (0., 1.), input_arrays[1] : (0., 1.)}  # mean, std_dev
        converter.default_ranges_stats = (0, 255)
        tflite_model = converter.convert()
        open(osp.join(save_model_dir, 'quantized_frozen_graph.tflite'), "wb").write(tflite_model)
      '''
      return

    # Training loop
    data_config = train_config['train_data_config']
    total_steps = int(data_config['epoch'] *
                      data_config['num_examples_per_epoch'] /
                      data_config['batch_size'])
    logging.info('Train for {} steps'.format(total_steps))
    save_step = int(data_config['num_examples_per_epoch'] / data_config['batch_size'])
    print("=========== save_step: {}".format(save_step))
    for step in range(start_step, total_steps):
      start_time = time.time()
      # no "feed_dict"
      # has "feed_dict" exmaple (mnist): https://qiita.com/SwitchBlade/items/6677c283b2402d060cd0
      _, loss, batch_loss, instances, response = sess.run([train_op, model.total_loss, model.batch_loss, model.instances, model.response])
      duration = time.time() - start_time
      if step % 10 == 0:
        examples_per_sec = data_config['batch_size'] / float(duration)
        time_remain = data_config['batch_size'] * (total_steps - step) / examples_per_sec
        m, s = divmod(time_remain, 60)
        h, m = divmod(m, 60)
        format_str = ('%s: step %d, total loss = %.2f, batch loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch; %dh:%02dm:%02ds remains)')
        logging.info(format_str % (datetime.now(), step, loss, batch_loss,
                                   examples_per_sec, duration, h, m, s))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      if step % save_step == 0 or (step + 1) == total_steps:
        checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
def main(model_config, train_config, track_config):
  os.environ['CUDA_VISIBLE_DEVICES'] = auto_select_gpu()

  # Create training directory which will be used to save: configurations, model files, TensorBoard logs
  train_dir = train_config['train_dir']
  if not osp.isdir(train_dir):
    logging.info('Creating training directory: %s', train_dir)
    mkdir_p(train_dir)

  g = tf.Graph()
  with g.as_default():
    # Set fixed seed for reproducible experiments
    random.seed(train_config['seed'])
    np.random.seed(train_config['seed'])
    tf.set_random_seed(train_config['seed'])

    # Build the training and validation model
    model = siamese_model.SiameseModel(model_config, train_config, mode='train')
    model.build()
    model_va = siamese_model.SiameseModel(model_config, train_config, mode='validation')
    model_va.build(reuse=True)

    # Save configurations for future reference
    save_cfgs(train_dir, model_config, train_config, track_config)

    learning_rate = _configure_learning_rate(train_config, model.global_step)
    optimizer = _configure_optimizer(train_config, learning_rate)
    tf.summary.scalar('learning_rate', learning_rate)

    # Set up the training ops
    opt_op = tf.contrib.layers.optimize_loss(
      loss=model.total_loss,
      global_step=model.global_step,
      learning_rate=learning_rate,
      optimizer=optimizer,
      clip_gradients=train_config['clip_gradients'],
      learning_rate_decay_fn=None,
      summaries=['learning_rate'])

    with tf.control_dependencies([opt_op]):
      train_op = tf.no_op(name='train')

    saver = tf.train.Saver(tf.global_variables(),
                           max_to_keep=train_config['max_checkpoints_to_keep'])

    summary_writer = tf.summary.FileWriter(train_dir, g)
    summary_op = tf.summary.merge_all()

    global_variables_init_op = tf.global_variables_initializer()
    local_variables_init_op = tf.local_variables_initializer()
    g.finalize()  # Finalize graph to avoid adding ops by mistake

    # Dynamically allocate GPU memory
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess_config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=sess_config)
    model_path = tf.train.latest_checkpoint(train_config['train_dir'])

    if not model_path:
      sess.run(global_variables_init_op)
      sess.run(local_variables_init_op)
      start_step = 0

      if model_config['embed_config']['embedding_checkpoint_file']:
        model.init_fn(sess)
    else:
      logging.info('Restore from last checkpoint: {}'.format(model_path))
      sess.run(local_variables_init_op)
      saver.restore(sess, model_path)
      start_step = tf.train.global_step(sess, model.global_step.name) + 1

    # Training loop
    data_config = train_config['train_data_config']
    total_steps = int(data_config['epoch'] *
                      data_config['num_examples_per_epoch'] /
                      data_config['batch_size'])
    logging.info('Train for {} steps'.format(total_steps))
    for step in range(start_step, total_steps):
      start_time = time.time()
      _, loss, batch_loss = sess.run([train_op, model.total_loss, model.batch_loss])
      duration = time.time() - start_time

      if step % 10 == 0:
        examples_per_sec = data_config['batch_size'] / float(duration)
        time_remain = data_config['batch_size'] * (total_steps - step) / examples_per_sec
        m, s = divmod(time_remain, 60)
        h, m = divmod(m, 60)
        format_str = ('%s: step %d, total loss = %.2f, batch loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch; %dh:%02dm:%02ds remains)')
        logging.info(format_str % (datetime.now(), step, loss, batch_loss,
                                   examples_per_sec, duration, h, m, s))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      if step % train_config['save_model_every_n_step'] == 0 or (step + 1) == total_steps:
        checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
Exemple #6
0
def main(model_config, train_config, track_config):

    # GPU Config
    gpu_list = train_config['train_data_config'].get('gpu_ids', '0')
    num_gpus = len(gpu_list.split(','))
    if num_gpus > 1:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = auto_select_gpu()

    # Create training directory which will be used to save: configurations, model files, TensorBoard logs
    train_dir = train_config['train_dir']
    if not osp.isdir(train_dir):
        logging.info('Creating training directory: %s', train_dir)
        mkdir_p(train_dir)

    g = tf.Graph()
    with g.as_default():
        # Set fixed seed for reproducible experiments
        random.seed(train_config['seed'])
        np.random.seed(train_config['seed'])
        tf.set_random_seed(train_config['seed'])

        #Build global step
        with tf.name_scope('train/'):
            global_step = tf.Variable(initial_value=0,
                                      name='global_step',
                                      trainable=False,
                                      collections=[
                                          tf.GraphKeys.GLOBAL_STEP,
                                          tf.GraphKeys.GLOBAL_VARIABLES
                                      ])

        Model = get_model(model_config['Model'])

        # build training dataloader and validation dataloader
        #---train
        train_dataloader = DataLoader(train_config['train_data_config'],
                                      is_training=True)
        train_dataloader.build()
        train_inputs = train_dataloader.get_one_batch()

        #---validation
        val_dataloader = DataLoader(train_config['validation_data_config'],
                                    is_training=False)
        val_dataloader.build()
        val_inputs = val_dataloader.get_one_batch()

        # Save configurations for future reference
        save_cfgs(train_dir, model_config, train_config, track_config)

        if train_config['lr_config'].get('lr_warmup', False):
            warmup_epoch_num = 10
            init_lr_ratio = 0.8
            warmup_steps = warmup_epoch_num * int(
                train_config['train_data_config']['num_examples_per_epoch']
            ) // train_config['train_data_config']['batch_size']
            inc_per_step = (
                1 - init_lr_ratio
            ) * train_config['lr_config']['initial_lr'] / warmup_steps
            warmup_lr = train_config['lr_config'][
                'initial_lr'] * init_lr_ratio + inc_per_step * tf.to_float(
                    global_step)
            learning_rate = tf.cond(
                tf.less(global_step,
                        warmup_steps), lambda: tf.identity(warmup_lr),
                lambda: _configure_learning_rate(train_config, global_step -
                                                 warmup_steps))
        else:
            learning_rate = _configure_learning_rate(train_config, global_step)

        optimizer = _configure_optimizer(train_config, learning_rate)
        tf.summary.scalar('learning_rate', learning_rate)

        # Set up the training ops
        examplars, instances, gt_examplar_boxes, gt_instance_boxes = tf.split(train_inputs[0],num_gpus), \
                                                                     tf.split(train_inputs[1],num_gpus), \
                                                                     tf.split(train_inputs[2],num_gpus), \
                                                                     tf.split(train_inputs[3],num_gpus)

        if train_config['train_data_config'].get('time_decay', False):
            time_intervals = tf.split(train_inputs[4], num_gpus)

        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpus):
                with tf.device('/gpu:%d' % i):
                    if train_config['train_data_config'].get(
                            'time_decay', False):
                        inputs = [
                            examplars[i], instances[i], gt_examplar_boxes[i],
                            gt_instance_boxes[i], time_intervals[i]
                        ]
                    else:
                        inputs = [
                            examplars[i], instances[i], gt_examplar_boxes[i],
                            gt_instance_boxes[i]
                        ]
                    model = tower_model(Model,
                                        inputs,
                                        model_config,
                                        train_config,
                                        mode='train')
                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()
                    grads = optimizer.compute_gradients(model.total_loss)
                    tower_grads.append(grads)
        grads = average_gradients(tower_grads)

        #Clip gradient
        gradients, tvars = zip(*grads)
        clip_gradients, _ = tf.clip_by_global_norm(
            gradients, train_config['clip_gradients'])
        train_op = optimizer.apply_gradients(zip(clip_gradients, tvars),
                                             global_step=global_step)

        #Build validation model
        with tf.device('/gpu:0'):
            model_va = Model(model_config,
                             train_config,
                             mode='validation',
                             inputs=val_inputs)
            model_va.build(reuse=True)

        #Save Model setup
        saver = tf.train.Saver(
            tf.global_variables(),
            max_to_keep=train_config['max_checkpoints_to_keep'])

        summary_writer = tf.summary.FileWriter(train_dir, g)
        summary_op = tf.summary.merge_all()

        global_variables_init_op = tf.global_variables_initializer()
        local_variables_init_op = tf.local_variables_initializer()

        # Dynamically allocate GPU memory
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess_config = tf.ConfigProto(gpu_options=gpu_options,
                                     allow_soft_placement=True)
        #inter_op_parallelism_threads = 16, intra_op_parallelism_threads = 16, log_device_placement=True)

        ######Debug timeline
        if Debug:
            from tensorflow.python.client import timeline
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
        ######Debug timeline

        sess = tf.Session(config=sess_config)
        model_path = tf.train.latest_checkpoint(train_config['train_dir'])

        if not model_path:
            sess.run(global_variables_init_op)
            sess.run(local_variables_init_op)
            start_step = 0
            if model_config['embed_config']['embedding_checkpoint_file']:
                model.init_fn(sess)
            elif model_config['finetuned_checkpoint_file']:
                finetuned_checkpoint_file = tf.train.latest_checkpoint(
                    model_config['finetuned_checkpoint_file'])
                logging.info('Restore from last checkpoint: {}'.format(
                    finetuned_checkpoint_file))
                sess.run(local_variables_init_op)
                sess.run(global_variables_init_op)
                restore_op = tf.contrib.slim.assign_from_checkpoint_fn(
                    finetuned_checkpoint_file,
                    tf.global_variables(),
                    ignore_missing_vars=True)
                restore_op(sess)
                #reset global step saved in checkpoint
                global_step_reset_op = global_step.assign(0)
                sess.run(global_step_reset_op)
        else:
            logging.info('Restore from last checkpoint: {}'.format(model_path))
            sess.run(local_variables_init_op)
            sess.run(global_variables_init_op)
            #saver.restore(sess, model_path)
            restore_op = tf.contrib.slim.assign_from_checkpoint_fn(
                model_path, tf.global_variables(), ignore_missing_vars=True)
            restore_op(sess)

        start_step = tf.train.global_step(sess, global_step.name) + 1
        print_trainable(sess)  #help function, can be disenable
        g.finalize()  # Finalize graph to avoid adding ops by mistake

        # Training loop
        data_config = train_config['train_data_config']
        total_steps = int(data_config['epoch'] *
                          data_config['num_examples_per_epoch'] /
                          data_config['batch_size'])
        logging.info('Train for {} steps'.format(total_steps))
        for step in range(start_step, total_steps):
            try:
                start_time = time.time()
                if Debug:
                    _, loss, batch_loss, current_lr = sess.run(
                        [
                            train_op, model.total_loss, model.batch_loss,
                            learning_rate
                        ],
                        run_metadata=run_metadata,
                        options=run_options)
                    t1 = timeline.Timeline(run_metadata.step_stats)
                    ctf = t1.generate_chrome_trace_format()
                    with open('timeline.json', 'w') as f:
                        f.write(ctf)
                else:
                    _, loss, batch_loss, current_lr = sess.run([
                        train_op, model.total_loss, model.batch_loss,
                        learning_rate
                    ])
                duration = time.time() - start_time

                if step % 10 == 0:
                    examples_per_sec = data_config['batch_size'] / float(
                        duration)
                    time_remain = data_config['batch_size'] * (
                        total_steps - step) / examples_per_sec
                    current_epoch = (
                        step * data_config['batch_size']
                    ) // data_config['num_examples_per_epoch'] + 1
                    m, s = divmod(time_remain, 60)
                    h, m = divmod(m, 60)
                    format_str = (
                        '%s: epoch %d-step %d,lr = %f, total loss = %.3f, batch loss = %.3f (%.1f examples/sec; %.3f '
                        'sec/batch; %dh:%02dm:%02ds remains)')
                    logging.info(
                        format_str %
                        (datetime.now(), current_epoch, step, current_lr, loss,
                         batch_loss, examples_per_sec, duration, h, m, s))

                if step % 200 == 0:
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)

                if step % train_config['save_model_every_n_step'] == 0 or (
                        step + 1) == total_steps:
                    checkpoint_path = osp.join(train_config['train_dir'],
                                               'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)
            except KeyboardInterrupt:
                checkpoint_path = osp.join(train_config['train_dir'],
                                           'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                print("save model.ckpt-%d" % (step))
                break
            except:
                print(traceback.format_exc())
                print("Error found in current step, continue")