def train(model_name=None, hparams=None, train_csv_path=None, train_clip_dir=None,
          class_map_path=None, train_dir=None, sample_rate=None):
  """Runs the training loop."""
  print('\nTraining model:{} with hparams:{} and class map:{}'.format(model_name, hparams, class_map_path))
  print('Training data: clip dir {} and labels {}'.format(train_clip_dir, train_csv_path))
  print('Training dir {}\n'.format(train_dir))

  with tf.Graph().as_default():
    # Create the input pipeline.
    features, labels, num_classes, input_init = inputs.train_input(
        train_csv_path=train_csv_path, train_clip_dir=train_clip_dir, class_map_path=class_map_path,
        hparams=hparams, sample_rate=sample_rate)
    # Create the model in training mode.
    global_step, prediction, loss_tensor, train_op = model.define_model(
        model_name=model_name, features=features, labels=labels, num_classes=num_classes,
        hparams=hparams, training=True)

    # Define our own checkpoint saving hook, instead of using the built-in one,
    # so that we can specify additional checkpoint retention settings.
    saver = tf.train.Saver(
        max_to_keep=30, keep_checkpoint_every_n_hours=0.25)
    saver_hook = tf.train.CheckpointSaverHook(
        save_steps=250, checkpoint_dir=train_dir, saver=saver)

    summary_op = tf.summary.merge_all()
    summary_hook = tf.train.SummarySaverHook(
        save_steps=50, output_dir=train_dir, summary_op=summary_op)

    with tf.train.SingularMonitoredSession(hooks=[saver_hook, summary_hook],
                                           checkpoint_dir=train_dir) as sess:
      sess.raw_session().run(input_init)
      while not sess.should_stop():
        step, _, pred, loss = sess.run([global_step, train_op, prediction, loss_tensor])
        print(step, loss)
        sys.stdout.flush()
Ejemplo n.º 2
0
def train(model_name=None,
          hparams=None,
          train_csv_path=None,
          train_clip_dir=None,
          class_map_path=None,
          train_dir=None,
          sample_rate=None):
    """Runs the training loop."""
    print('\nTraining model:{} with hparams:{} and class map:{}'.format(
        model_name, hparams, class_map_path))
    print('Training data: clip dir {} and labels {}'.format(
        train_clip_dir, train_csv_path))
    print('Training dir {}\n'.format(train_dir))

    with tf.Graph().as_default():
        # Create the input pipeline.
        features, labels, num_classes, input_init = inputs.train_input(
            train_csv_path=train_csv_path,
            train_clip_dir=train_clip_dir,
            class_map_path=class_map_path,
            hparams=hparams,
            sample_rate=sample_rate)
        # Create the model in training mode.
        global_step, prediction, loss_tensor, train_op = model.define_model(
            model_name=model_name,
            features=features,
            labels=labels,
            num_classes=num_classes,
            hparams=hparams,
            training=True)

        # Define our own checkpoint saving hook, instead of using the built-in one,
        # so that we can specify additional checkpoint retention settings.
        saver = tf.train.Saver(max_to_keep=30,
                               keep_checkpoint_every_n_hours=0.25)
        saver_hook = tf.train.CheckpointSaverHook(save_steps=250,
                                                  checkpoint_dir=train_dir,
                                                  saver=saver)

        summary_op = tf.summary.merge_all()
        summary_hook = tf.train.SummarySaverHook(save_steps=50,
                                                 output_dir=train_dir,
                                                 summary_op=summary_op)

        with tf.train.SingularMonitoredSession(
                hooks=[saver_hook,
                       summary_hook], checkpoint_dir=train_dir) as sess:
            sess.raw_session().run(input_init)
            while not sess.should_stop():
                step, _, pred, loss = sess.run(
                    [global_step, train_op, prediction, loss_tensor])
                print(step, loss)
                sys.stdout.flush()
Ejemplo n.º 3
0
def train(model_name=None,
          hparams=None,
          class_map_path=None,
          train_csv_path=None,
          train_clip_dir=None,
          train_dir=None,
          epoch_batches=None,
          warmstart_checkpoint=None,
          warmstart_include_scopes=None,
          warmstart_exclude_scopes=None):
    """Runs the training loop."""
    print('\nTraining model:{} with hparams:{} and class map:{}'.format(
        model_name, hparams, class_map_path))
    print('Training data: clip dir {} and labels {}'.format(
        train_clip_dir, train_csv_path))
    print('Training dir {}\n'.format(train_dir))

    with tf.Graph().as_default():
        # Create the input pipeline.
        features, labels, num_classes, input_init = inputs.train_input(
            train_csv_path=train_csv_path,
            train_clip_dir=train_clip_dir,
            class_map_path=class_map_path,
            hparams=hparams)
        # Create the model in training mode.
        global_step, prediction, loss_tensor, train_op = model.define_model(
            model_name=model_name,
            features=features,
            labels=labels,
            num_classes=num_classes,
            hparams=hparams,
            epoch_batches=epoch_batches,
            training=True)

        # Define our own checkpoint saving hook, instead of using the built-in one,
        # so that we can specify additional checkpoint retention settings.
        saver = tf.train.Saver(max_to_keep=10000,
                               keep_checkpoint_every_n_hours=0.25)
        saver_hook = tf.train.CheckpointSaverHook(save_steps=100,
                                                  checkpoint_dir=train_dir,
                                                  saver=saver)

        summary_op = tf.summary.merge_all()
        summary_hook = tf.train.SummarySaverHook(save_steps=10,
                                                 output_dir=train_dir,
                                                 summary_op=summary_op)

        if hparams.warmstart:
            var_include_scopes = warmstart_include_scopes
            if not var_include_scopes: var_include_scopes = None
            var_exclude_scopes = warmstart_exclude_scopes
            if not var_exclude_scopes: var_exclude_scopes = None
            restore_vars = tf.contrib.framework.get_variables_to_restore(
                include=var_include_scopes, exclude=var_exclude_scopes)
            # Only restore trainable variables, we don't want to restore
            # batch-norm or optimizer-specific local variables.
            trainable_vars = set(
                tf.contrib.framework.get_trainable_variables())
            restore_vars = [
                var for var in restore_vars if var in trainable_vars
            ]

            print('Warm-start: restoring variables:\n%s\n' %
                  '\n'.join([x.name for x in restore_vars]))
            print('Warm-start: restoring from ', warmstart_checkpoint)
            assert restore_vars, 'No warm-start variables to restore!'
            restore_op, feed_dict = tf.contrib.framework.assign_from_checkpoint(
                model_path=warmstart_checkpoint,
                var_list=restore_vars,
                ignore_missing_vars=True)

            scaffold = tf.train.Scaffold(init_fn=lambda scaffold, session:
                                         session.run(restore_op, feed_dict),
                                         summary_op=summary_op,
                                         saver=saver)
        else:
            scaffold = None

        with tf.train.SingularMonitoredSession(
                hooks=[saver_hook, summary_hook],
                checkpoint_dir=train_dir,
                scaffold=scaffold) as sess:
            sess.raw_session().run(input_init)
            while not sess.should_stop():
                step, _, pred, loss = sess.run(
                    [global_step, train_op, prediction, loss_tensor])
                print(step, loss)
                sys.stdout.flush()
def train_and_evaluate(model_name=None, hparams=None, class_map_path=None, train_csv_path=None, train_clip_dir=None,
                       train_dir=None, epoch_batches=None, warmstart_checkpoint=None,
                       warmstart_include_scopes=None, warmstart_exclude_scopes=None,
                       eval_csv_path=None, eval_clip_dir=None, eval_dir=None):
    """Runs the training loop."""
    print('\nTraining model:{} with hparams:{} and class map:{}'.format(model_name, hparams, class_map_path))
    print('Training data: clip dir {} and labels {}'.format(train_clip_dir, train_csv_path))
    print('Training dir {}\n'.format(train_dir))

    class_map = {int(row[0]): row[1] for row in csv.reader(open(class_map_path))}

    with tf.Graph().as_default():
        # Create the input pipeline.
        features, labels, num_classes, input_init = inputs.train_input(
            train_csv_path=train_csv_path, train_clip_dir=train_clip_dir, class_map_path=class_map_path,
            hparams=hparams)
        # Create the model in training mode.
        global_step, prediction, loss_tensor, train_op = model.define_model(
            model_name=model_name, features=features, labels=labels, num_classes=num_classes,
            hparams=hparams, epoch_batches=epoch_batches, training=True)

        # evaluation graph
        label_class_index_table, num_classes = inputs.get_class_map(class_map_path)
        csv_record = tf.placeholder(tf.string, [])  # fed during evaluation loop.

        eval_features, eval_labels = inputs.record_to_labeled_log_mel_examples(
            csv_record, clip_dir=eval_clip_dir, hparams=hparams,
            label_class_index_table=label_class_index_table, num_classes=num_classes)

        # Create the model in prediction mode.
        global_step, eval_predictions, eval_loss_tensor, _ = model.define_model(
            model_name=model_name, features=eval_features, labels=eval_labels, num_classes=num_classes,
            hparams=hparams, training=False, evaluating=True)

        # Write evaluation graph to checkpoint directory.
        tf.train.write_graph(tf.get_default_graph().as_graph_def(add_shapes=True),
                             eval_dir, 'eval.pbtxt')

        eval_writer = tf.summary.FileWriter(eval_dir, tf.get_default_graph())

        # Define our own checkpoint saving hook, instead of using the built-in one,
        # so that we can specify additional checkpoint retention settings.
        saver = tf.train.Saver(
            max_to_keep=10, keep_checkpoint_every_n_hours=0.25)
        saver_hook = tf.train.CheckpointSaverHook(
            save_steps=100, checkpoint_dir=train_dir, saver=saver)

        summary_op = tf.summary.merge_all()
        summary_hook = tf.train.SummarySaverHook(
            save_steps=10, output_dir=train_dir, summary_op=summary_op)

        if hparams.warmstart:
            var_include_scopes = warmstart_include_scopes
            if not var_include_scopes: var_include_scopes = None
            var_exclude_scopes = warmstart_exclude_scopes
            if not var_exclude_scopes: var_exclude_scopes = None
            restore_vars = tf.contrib.framework.get_variables_to_restore(
                include=var_include_scopes, exclude=var_exclude_scopes)
            # Only restore trainable variables, we don't want to restore
            # batch-norm or optimizer-specific local variables.
            trainable_vars = set(tf.contrib.framework.get_trainable_variables())
            restore_vars = [var for var in restore_vars if var in trainable_vars]

            print('Warm-start: restoring variables:\n%s\n' % '\n'.join([x.name for x in restore_vars]))
            print('Warm-start: restoring from ', warmstart_checkpoint)
            assert restore_vars, 'No warm-start variables to restore!'
            restore_op, feed_dict = tf.contrib.framework.assign_from_checkpoint(
                model_path=warmstart_checkpoint, var_list=restore_vars, ignore_missing_vars=True)

            scaffold = tf.train.Scaffold(
                init_fn=lambda scaffold, session: session.run(restore_op, feed_dict),
                summary_op=summary_op, saver=saver)
        else:
            scaffold = None

        with tf.train.SingularMonitoredSession(hooks=[saver_hook, summary_hook],
                                               checkpoint_dir=train_dir,
                                               scaffold=scaffold,
                                               config=tf.ConfigProto(log_device_placement=True)) as sess:
            sess.raw_session().run(input_init)
            while not sess.should_stop():

                # train
                step, _, pred, loss = sess.run([global_step, train_op, prediction, loss_tensor])
                print(step, loss)
                sys.stdout.flush()

                # evaluates every 100 steps
                if step > 0 and step % 100 == 0:
                    # Loop through all checkpoints in the training directory.
                    checkpoint_state = tf.train.get_checkpoint_state(train_dir)

                    lwlrap = eval_batch(eval_csv_path, sess, eval_labels, eval_predictions, csv_record, step, eval_writer, class_map, eval_loss_tensor)

                        
Ejemplo n.º 5
0
    pipeline_config_path = 'faster_rcnn_resnet50_v1_640x640_coco17_tpu-8.config'
    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    train_config = configs['train_config']
    model_config = configs['model']
    train_input_config = configs['train_input_config']

    detection_model = model_build(model_config,
                                  True,
                                  num_classes=4,
                                  min_dim=640,
                                  max_dim=640)

    # print(train_config.add_regularization_loss)

    train_input = inputs.train_input(train_config=train_config,
                                     train_input_config=train_input_config,
                                     model_config=model_config,
                                     batch_size=2,
                                     num_classes=90,
                                     min_dim=640,
                                     max_dim=640)

    print(train_input)

    # detection_model._is_training = is_training  # pylint: disable=protected-access
    # tf.keras.backend.set_learning_phase(is_training)
    # # pdb.set_trace()

    # losses_dict, _ = _compute_losses_and_predictions_dicts(
    #     detection_model, features, labels, add_regularization_loss)