Esempio n. 1
0
def initialize_session(acoustic_checkpoint, hparams):
    """Initializes a transcription session."""
    with tf.Graph().as_default():
        examples = tf.placeholder(tf.string, [None])

        batch, iterator = data.provide_batch(batch_size=1,
                                             examples=examples,
                                             hparams=hparams,
                                             is_training=False,
                                             truncated_length=0)

        model.get_model(batch, hparams, is_training=False)

        session = tf.Session()
        saver = tf.train.Saver()
        saver.restore(session, acoustic_checkpoint)

        onset_probs_flat = tf.get_default_graph().get_tensor_by_name(
            'onsets/onset_probs_flat:0')
        frame_probs_flat = tf.get_default_graph().get_tensor_by_name(
            'frame_probs_flat:0')
        velocity_values_flat = tf.get_default_graph().get_tensor_by_name(
            'velocity/velocity_values_flat:0')

        return TranscriptionSession(session=session,
                                    examples=examples,
                                    iterator=iterator,
                                    onset_probs_flat=onset_probs_flat,
                                    frame_probs_flat=frame_probs_flat,
                                    velocity_values_flat=velocity_values_flat,
                                    hparams=hparams)
def initialize_session(acoustic_checkpoint, hparams):
  """Initializes a transcription session."""
  with tf.Graph().as_default():
    examples = tf.placeholder(tf.string, [None])

    hparams.batch_size = 1

    batch, iterator = data.provide_batch(
        batch_size=1,
        examples=examples,
        hparams=hparams,
        is_training=False,
        truncated_length=0)

    model.get_model(batch, hparams, is_training=False)

    session = tf.Session()
    saver = tf.train.Saver()
    saver.restore(session, acoustic_checkpoint)

    onset_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'onsets/onset_probs_flat:0')
    frame_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'frame_probs_flat:0')
    velocity_values_flat = tf.get_default_graph().get_tensor_by_name(
        'velocity/velocity_values_flat:0')

    return TranscriptionSession(
        session=session,
        examples=examples,
        iterator=iterator,
        onset_probs_flat=onset_probs_flat,
        frame_probs_flat=frame_probs_flat,
        velocity_values_flat=velocity_values_flat,
        hparams=hparams)
Esempio n. 3
0
def test(checkpoint_path, test_dir, examples_path, hparams, num_batches=None):
    """Evaluate the model at a single checkpoint."""
    tf.gfile.MakeDirs(test_dir)

    _trial_summary(hparams, examples_path, test_dir)
    with tf.Graph().as_default():
        transcription_data = _get_data(examples_path,
                                       hparams,
                                       is_training=False)
        unused_loss, losses, labels, predictions, images = model.get_model(
            transcription_data, hparams, is_training=False)

        metrics_to_values, metrics_to_updates = _get_eval_metrics(
            losses, labels, predictions, images, hparams)

        metric_values = slim.evaluation.evaluate_once(
            master='',
            checkpoint_path=checkpoint_path,
            logdir=test_dir,
            num_evals=num_batches or transcription_data.num_batches,
            eval_op=list(metrics_to_updates.values()),
            final_op=list(metrics_to_values.values()))

        metrics_to_values = dict(
            zip(list(metrics_to_values.keys()), metric_values))
        for metric in metrics_to_values:
            value = metrics_to_values[metric]
            if np.isscalar(value):
                print('%s: %f' % (metric, value))
Esempio n. 4
0
def evaluate(train_dir, eval_dir, examples_path, hparams, num_batches=None):
    """Evaluate the model repeatedly."""
    tf.gfile.MakeDirs(eval_dir)

    _trial_summary(hparams, examples_path, eval_dir)
    with tf.Graph().as_default():
        transcription_data = _get_data(examples_path,
                                       hparams,
                                       is_training=False)
        unused_loss, losses, labels, predictions, images = model.get_model(
            transcription_data, hparams, is_training=False)

        _, metrics_to_updates = _get_eval_metrics(losses, labels, predictions,
                                                  images, hparams)

        hooks = [
            tf.contrib.training.StopAfterNEvalsHook(
                num_batches or transcription_data.num_batches),
            tf.contrib.training.SummaryAtEndHook(eval_dir)
        ]
        tf.contrib.training.evaluate_repeatedly(
            train_dir,
            eval_ops=list(metrics_to_updates.values()),
            hooks=hooks,
            eval_interval_secs=60,
            timeout=None)
Esempio n. 5
0
def test(checkpoint_path,
         test_dir,
         examples_path,
         hparams,
         num_batches=None,
         master=''):
  """Evaluate the model at a single checkpoint."""
  tf.gfile.MakeDirs(test_dir)

  _trial_summary(hparams, examples_path, test_dir)
  with tf.Graph().as_default():
    transcription_data = _get_data(
        examples_path, hparams, is_training=False)
    unused_loss, losses, labels, predictions, images = model.get_model(
        transcription_data, hparams, is_training=False)

    metrics_to_values, metrics_to_updates = _get_eval_metrics(
        losses, labels, predictions, images, hparams)

    metric_values = slim.evaluation.evaluate_once(
        checkpoint_path=checkpoint_path,
        logdir=test_dir,
        num_evals=num_batches or transcription_data.num_batches,
        eval_op=list(metrics_to_updates.values()),
        final_op=list(metrics_to_values.values()),
        master=master)

    metrics_to_values = dict(zip(list(metrics_to_values.keys()), metric_values))
    for metric in metrics_to_values:
      value = metrics_to_values[metric]
      if np.isscalar(value):
        print('%s: %f' % (metric, value))
Esempio n. 6
0
def evaluate(train_dir,
             eval_dir,
             examples_path,
             hparams,
             num_batches=None,
             master=''):
  """Evaluate the model repeatedly."""
  tf.gfile.MakeDirs(eval_dir)

  _trial_summary(hparams, examples_path, eval_dir)
  with tf.Graph().as_default():
    transcription_data = _get_data(examples_path, hparams, is_training=False)
    unused_loss, losses, labels, predictions, images = model.get_model(
        transcription_data, hparams, is_training=False)

    _, metrics_to_updates = _get_eval_metrics(
        losses, labels, predictions, images, hparams)

    hooks = [
        tf.contrib.training.StopAfterNEvalsHook(
            num_batches or transcription_data.num_batches),
        tf.contrib.training.SummaryAtEndHook(eval_dir)]
    tf.contrib.training.evaluate_repeatedly(
        train_dir,
        eval_ops=list(metrics_to_updates.values()),
        hooks=hooks,
        eval_interval_secs=60,
        timeout=None,
        master=master)
Esempio n. 7
0
def train(train_dir,
          examples_path,
          hparams,
          checkpoints_to_keep=5,
          keep_checkpoint_every_n_hours=1,
          num_steps=None):
    """Train loop."""
    tf.gfile.MakeDirs(train_dir)

    _trial_summary(hparams, examples_path, train_dir)
    with tf.Graph().as_default():
        transcription_data = _get_data(examples_path,
                                       hparams,
                                       is_training=True)

        loss, losses, unused_labels, unused_predictions, images = model.get_model(
            transcription_data, hparams, is_training=True)

        tf.summary.scalar('loss', loss)
        for label, loss_collection in losses.iteritems():
            loss_label = 'losses/' + label
            tf.summary.scalar(loss_label, tf.reduce_mean(loss_collection))
        for name, image in images.iteritems():
            tf.summary.image(name, image)

        global_step = tf.train.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(hparams.learning_rate,
                                                   global_step,
                                                   hparams.decay_steps,
                                                   hparams.decay_rate,
                                                   staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        train_op = slim.learning.create_train_op(
            loss,
            optimizer,
            clip_gradient_norm=hparams.clip_norm,
            summarize_gradients=True)

        logging_dict = {
            'global_step': tf.train.get_global_step(),
            'loss': loss
        }

        hooks = [tf.train.LoggingTensorHook(logging_dict, every_n_iter=100)]
        if num_steps:
            hooks.append(tf.train.StopAtStepHook(num_steps))

        scaffold = tf.train.Scaffold(saver=tf.train.Saver(
            max_to_keep=checkpoints_to_keep,
            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours))

        tf.contrib.training.train(train_op=train_op,
                                  logdir=train_dir,
                                  scaffold=scaffold,
                                  hooks=hooks,
                                  save_checkpoint_secs=300)
def model_inference(acoustic_checkpoint, hparams, examples_path, run_dir):
  """Runs inference for the given examples."""
  tf.logging.info('acoustic_checkpoint=%s', acoustic_checkpoint)
  tf.logging.info('examples_path=%s', examples_path)
  tf.logging.info('run_dir=%s', run_dir)

  with tf.Graph().as_default():
    num_dims = constants.MIDI_PITCHES

    # Build the acoustic model within an 'acoustic' scope to isolate its
    # variables from the other models.
    with tf.variable_scope('acoustic'):
      truncated_length = 0
      if FLAGS.max_seconds_per_sequence:
        truncated_length = int(
            math.ceil((FLAGS.max_seconds_per_sequence *
                       data.hparams_frames_per_second(hparams))))
      acoustic_data_provider, _ = data.provide_batch(
          batch_size=1,
          examples=examples_path,
          hparams=hparams,
          is_training=False,
          truncated_length=truncated_length,
          include_note_sequences=True)

      _, _, data_labels, _, _ = model.get_model(
          acoustic_data_provider, hparams, is_training=False)

    # The checkpoints won't have the new scopes.
    acoustic_variables = {
        re.sub(r'^acoustic/', '', var.op.name): var
        for var in slim.get_variables(scope='acoustic/')
    }
    acoustic_restore = tf.train.Saver(acoustic_variables)

    onset_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'acoustic/onsets/onset_probs_flat:0')
    frame_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'acoustic/frame_probs_flat:0')
    offset_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'acoustic/offsets/offset_probs_flat:0')
    velocity_values_flat = tf.get_default_graph().get_tensor_by_name(
        'acoustic/velocity/velocity_values_flat:0')

    # Define some metrics.
    (metrics_to_updates, metric_note_precision, metric_note_recall,
     metric_note_f1, metric_note_precision_with_offsets,
     metric_note_recall_with_offsets, metric_note_f1_with_offsets,
     metric_note_precision_with_offsets_velocity,
     metric_note_recall_with_offsets_velocity,
     metric_note_f1_with_offsets_velocity, metric_frame_labels,
     metric_frame_predictions) = infer_util.define_metrics(num_dims)

    summary_op = tf.summary.merge_all()
    global_step = tf.contrib.framework.get_or_create_global_step()
    global_step_increment = global_step.assign_add(1)

    # Use a custom init function to restore the acoustic and language models
    # from their separate checkpoints.
    def init_fn(unused_self, sess):
      acoustic_restore.restore(sess, acoustic_checkpoint)

    scaffold = tf.train.Scaffold(init_fn=init_fn)
    session_creator = tf.train.ChiefSessionCreator(
        scaffold=scaffold, master=FLAGS.master)
    with tf.train.MonitoredSession(session_creator=session_creator) as sess:
      tf.logging.info('running session')
      summary_writer = tf.summary.FileWriter(
          logdir=run_dir, graph=sess.graph)

      tf.logging.info('Inferring for %d batches',
                      acoustic_data_provider.num_batches)
      infer_times = []
      num_frames = []
      for unused_i in range(acoustic_data_provider.num_batches):
        start_time = time.time()
        (labels, filenames, note_sequences, frame_probs, onset_probs,
         offset_probs, velocity_values) = sess.run([
             data_labels,
             acoustic_data_provider.filenames,
             acoustic_data_provider.note_sequences,
             frame_probs_flat,
             onset_probs_flat,
             offset_probs_flat,
             velocity_values_flat,
         ])
        # We expect these all to be length 1 because batch size is 1.
        assert len(filenames) == len(note_sequences) == 1
        # These should be the same length and have been flattened.
        assert len(labels) == len(frame_probs) == len(onset_probs)

        frame_predictions = frame_probs > FLAGS.frame_threshold
        if FLAGS.require_onset:
          onset_predictions = onset_probs > FLAGS.onset_threshold
        else:
          onset_predictions = None

        if FLAGS.use_offset:
          offset_predictions = offset_probs > FLAGS.offset_threshold
        else:
          offset_predictions = None

        sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
            frame_predictions,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_duration_ms=0,
            min_midi_pitch=constants.MIN_MIDI_PITCH,
            onset_predictions=onset_predictions,
            offset_predictions=offset_predictions,
            velocity_values=velocity_values)

        end_time = time.time()
        infer_time = end_time - start_time
        infer_times.append(infer_time)
        num_frames.append(frame_probs.shape[0])
        tf.logging.info(
            'Infer time %f, frames %d, frames/sec %f, running average %f',
            infer_time, frame_probs.shape[0], frame_probs.shape[0] / infer_time,
            np.sum(num_frames) / np.sum(infer_times))

        tf.logging.info('Scoring sequence %s', filenames[0])

        def shift_notesequence(ns_time):
          return ns_time + hparams.backward_shift_amount_ms / 1000.

        sequence_label = infer_util.score_sequence(
            sess,
            global_step_increment,
            summary_op,
            summary_writer,
            metrics_to_updates,
            metric_note_precision,
            metric_note_recall,
            metric_note_f1,
            metric_note_precision_with_offsets,
            metric_note_recall_with_offsets,
            metric_note_f1_with_offsets,
            metric_note_precision_with_offsets_velocity,
            metric_note_recall_with_offsets_velocity,
            metric_note_f1_with_offsets_velocity,
            metric_frame_labels,
            metric_frame_predictions,
            frame_labels=labels,
            sequence_prediction=sequence_prediction,
            frames_per_second=data.hparams_frames_per_second(hparams),
            sequence_label=sequences_lib.adjust_notesequence_times(
                music_pb2.NoteSequence.FromString(note_sequences[0]),
                shift_notesequence)[0],
            sequence_id=filenames[0])

        # Make filenames UNIX-friendly.
        filename = filenames[0].decode('utf-8').replace('/', '_').replace(
            ':', '.')
        output_file = os.path.join(run_dir, filename + '.mid')
        tf.logging.info('Writing inferred midi file to %s', output_file)
        midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file)

        label_output_file = os.path.join(run_dir, filename + '_label.mid')
        tf.logging.info('Writing label midi file to %s', label_output_file)
        midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file)

        # Also write a pianoroll showing acoustic model output vs labels.
        pianoroll_output_file = os.path.join(run_dir,
                                             filename + '_pianoroll.png')
        tf.logging.info('Writing acoustic logit/label file to %s',
                        pianoroll_output_file)
        with tf.gfile.GFile(pianoroll_output_file, mode='w') as f:
          scipy.misc.imsave(
              f,
              infer_util.posterior_pianoroll_image(
                  frame_probs,
                  sequence_prediction,
                  labels,
                  overlap=True,
                  frames_per_second=data.hparams_frames_per_second(hparams)))

        summary_writer.flush()
Esempio n. 9
0
print('acoustic_checkpoint=' + acoustic_checkpoint)
hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                 model.get_default_hparams())

with tf.Graph().as_default():
    examples = tf.placeholder(tf.string, [None])

    num_dims = constants.MIDI_PITCHES

    batch, iterator = data.provide_batch(batch_size=1,
                                         examples=examples,
                                         hparams=hparams,
                                         is_training=False,
                                         truncated_length=0)

    model.get_model(batch, hparams, is_training=False)

    session = tf.Session()
    saver = tf.train.Saver()
    saver.restore(session, acoustic_checkpoint)

    onset_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'onsets/onset_probs_flat:0')
    frame_probs_flat = tf.get_default_graph().get_tensor_by_name(
        'frame_probs_flat:0')

# file = open('./data/wav_format/xinjing.wav','w')   ##todo
# to_process = []

# wav_data = audio_io.samples_to_wav_data(
#     librosa.util.normalize(librosa.core.load(file, sr=hparams.sample_rate)[0]),
Esempio n. 10
0
def train(train_dir,
          examples_path,
          hparams,
          checkpoints_to_keep=5,
          keep_checkpoint_every_n_hours=1,
          num_steps=None,
          master='',
          task=0,
          num_ps_tasks=0):
  """Train loop."""
  tf.gfile.MakeDirs(train_dir)
  is_chief = task == 0

  if is_chief:
    _trial_summary(hparams, examples_path, train_dir)

  with tf.Graph().as_default():
    with tf.device(
        tf.train.replica_device_setter(num_ps_tasks, merge_devices=True)):
      transcription_data = _get_data(examples_path, hparams, is_training=True)

      loss, losses, unused_labels, unused_predictions, images = model.get_model(
          transcription_data, hparams, is_training=True)

      tf.summary.scalar('loss', loss)
      for label, loss_collection in losses.items():
        loss_label = 'losses/' + label
        tf.summary.scalar(loss_label, tf.reduce_mean(loss_collection))
      for name, image in images.items():
        tf.summary.image(name, image)

      global_step = tf.train.get_or_create_global_step()
      learning_rate = tf.train.exponential_decay(
          hparams.learning_rate,
          global_step,
          hparams.decay_steps,
          hparams.decay_rate,
          staircase=True)
      tf.summary.scalar('learning_rate', learning_rate)
      frame_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

      frame_train_op = slim.learning.create_train_op(
          loss,
          frame_optimizer,
          clip_gradient_norm=hparams.clip_norm,
          summarize_gradients=True,
          variables_to_train=None)

      logging_dict = {'global_step': tf.train.get_global_step(), 'loss': loss}
      if hasattr(hparams, 'sampling_probability'):
        logging_dict['sampling_probability'] = hparams.sampling_probability

      frame_hooks = [tf.train.LoggingTensorHook(logging_dict, every_n_iter=100)]
      if num_steps:
        frame_hooks.append(tf.train.StopAtStepHook(num_steps))

      scaffold = tf.train.Scaffold(
          saver=tf.train.Saver(
              max_to_keep=checkpoints_to_keep,
              keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours))

      tf.contrib.training.train(
          train_op=frame_train_op,
          logdir=train_dir,
          scaffold=scaffold,
          hooks=frame_hooks,
          save_checkpoint_secs=300,
          master=master,
          is_chief=is_chief)