def main(argv):
  tf.logging.set_verbosity(FLAGS.log)

  if FLAGS.acoustic_checkpoint_filename:
    acoustic_checkpoint = os.path.join(
        os.path.expanduser(FLAGS.acoustic_run_dir), 'train',
        FLAGS.acoustic_checkpoint_filename)
  else:
    acoustic_checkpoint = tf.train.latest_checkpoint(
        os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train'))

  hparams = tf_utils.merge_hparams(
      constants.DEFAULT_HPARAMS, model.get_default_hparams())
  hparams.parse(FLAGS.hparams)

  transcription_session = initialize_session(acoustic_checkpoint, hparams)

  for filename in argv[1:]:
    tf.logging.info('Starting transcription for %s...', filename)

    sequence_prediction = transcribe_audio(
        transcription_session, filename, FLAGS.frame_threshold,
        FLAGS.onset_threshold)

    midi_filename = filename + '.midi'
    midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)

    tf.logging.info('Transcription written to %s.', midi_filename)
Beispiel #2
0
def get_default_hparams():
    """Returns the default hyperparameters.

  Returns:
    A tf.HParams object representing the default hyperparameters for the model.
  """
    return tf_utils.merge_hparams(
        constants.DEFAULT_HPARAMS,
        tf.contrib.training.HParams(
            activation_loss=False,
            batch_size=8,
            clip_norm=3,
            combined_lstm_units=128,
            frame_bidirectional=True,
            frame_lstm_units=0,
            learning_rate=0.0006,
            min_duration_ms=0,
            min_frame_occupancy_for_label=0.0,
            normalize_audio=False,
            onset_bidirectional=True,
            onset_delay=0,
            onset_length=32,
            onset_lstm_units=128,
            onset_mode='length_ms',
            sample_rate=constants.DEFAULT_SAMPLE_RATE,
            share_conv_features=False,
            spec_fmin=30.0,
            spec_hop_length=512,
            spec_log_amplitude=True,
            spec_n_bins=229,
            spec_type='mel',
            stop_activation_gradient=False,
            stop_onset_gradient=False,
            truncated_length=1500,  # 48 seconds
            weight_frame_and_activation_loss=False))
Beispiel #3
0
def main(argv):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.acoustic_checkpoint_filename:
        acoustic_checkpoint = os.path.join(
            os.path.expanduser(FLAGS.acoustic_run_dir), 'train',
            FLAGS.acoustic_checkpoint_filename)
    else:
        acoustic_checkpoint = tf.train.latest_checkpoint(
            os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train'))

    hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                     model.get_default_hparams())
    hparams.parse(FLAGS.hparams)

    transcription_session = initialize_session(acoustic_checkpoint, hparams)

    for filename in argv[1:]:
        tf.logging.info('Starting transcription for %s...', filename)

        sequence_prediction = transcribe_audio(transcription_session, filename,
                                               FLAGS.frame_threshold,
                                               FLAGS.onset_threshold)

        midi_filename = filename + '.midi'
        midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)

        tf.logging.info('Transcription written to %s.', midi_filename)
def main(unused_argv):
  if FLAGS.acoustic_checkpoint_filename:
    acoustic_checkpoint = os.path.join(
        os.path.expanduser(FLAGS.acoustic_run_dir), 'train',
        FLAGS.acoustic_checkpoint_filename)
  else:
    acoustic_checkpoint = tf.train.latest_checkpoint(
        os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train'))

  run_dir = os.path.expanduser(FLAGS.run_dir)

  hparams = tf_utils.merge_hparams(
      constants.DEFAULT_HPARAMS, model.get_default_hparams())
  hparams.parse(FLAGS.hparams)

  # Batch size should always be 1 for inference.
  hparams.batch_size = 1

  tf.logging.info(hparams)

  tf.gfile.MakeDirs(run_dir)

  with tf.gfile.Open(os.path.join(run_dir, 'run_config.txt'), 'w') as f:
    f.write(acoustic_checkpoint + '\n')
    f.write(FLAGS.examples_path + '\n')
    f.write(str(hparams) + '\n')

  model_inference(
      acoustic_checkpoint=acoustic_checkpoint,
      hparams=hparams,
      examples_path=FLAGS.examples_path,
      run_dir=run_dir)
def main(unused_argv):
  output_dir = os.path.expanduser(FLAGS.output_dir)

  hparams = tf_utils.merge_hparams(
      constants.DEFAULT_HPARAMS, model.get_default_hparams())
  hparams.parse(FLAGS.hparams)

  # Batch size should always be 1 for inference.
  hparams.batch_size = 1

  tf.logging.info(hparams)

  tf.gfile.MakeDirs(output_dir)

  summary_writer = tf.summary.FileWriter(logdir=output_dir)

  with tf.Session():
    run_config = '\n\n'.join([
        'model_dir: ' + FLAGS.model_dir,
        'checkpoint_path: ' + str(FLAGS.checkpoint_path),
        'examples_path: ' + FLAGS.examples_path,
        str(hparams),
    ])
    run_config_summary = tf.summary.text(
        'run_config',
        tf.constant(run_config, name='run_config'),
        collections=[])
    summary_writer.add_summary(run_config_summary.eval())

  if FLAGS.eval_loop:
    assert not FLAGS.checkpoint_path

    checkpoint_path = None
    while True:
      checkpoint_path = tf.contrib.training.wait_for_new_checkpoint(
          FLAGS.model_dir, last_checkpoint=checkpoint_path)
      model_inference(
          model_dir=FLAGS.model_dir,
          checkpoint_path=checkpoint_path,
          hparams=hparams,
          examples_path=FLAGS.examples_path,
          output_dir=output_dir,
          summary_writer=summary_writer,
          master=FLAGS.master,
          write_summary_every_step=False)
  else:
    model_inference(
        model_dir=FLAGS.model_dir,
        checkpoint_path=FLAGS.checkpoint_path,
        hparams=hparams,
        examples_path=FLAGS.examples_path,
        output_dir=output_dir,
        summary_writer=summary_writer,
        master=FLAGS.master)
Beispiel #6
0
    def init(self):
        tf.logging.set_verbosity(self.log)

        acoustic_checkpoint = tf.train.latest_checkpoint(
            os.path.join(os.path.expanduser(self.acoustic_run_dir), 'train'))

        default_hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                                 model.get_default_hparams())
        default_hparams.parse(self.hparams)

        self.transcription_session = self.initialize_session(
            acoustic_checkpoint, default_hparams)
Beispiel #7
0
def main(unused_argv):
    output_dir = os.path.expanduser(FLAGS.output_dir)

    hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                     model.get_default_hparams())
    hparams.parse(FLAGS.hparams)

    # Batch size should always be 1 for inference.
    hparams.batch_size = 1

    tf.logging.info(hparams)

    tf.gfile.MakeDirs(output_dir)

    summary_writer = tf.summary.FileWriter(logdir=output_dir)

    with tf.Session():
        run_config = '\n\n'.join([
            'model_dir: ' + FLAGS.model_dir,
            'checkpoint_path: ' + str(FLAGS.checkpoint_path),
            'examples_path: ' + FLAGS.examples_path,
            str(hparams),
        ])
        run_config_summary = tf.summary.text('run_config',
                                             tf.constant(run_config,
                                                         name='run_config'),
                                             collections=[])
        summary_writer.add_summary(run_config_summary.eval())

    if FLAGS.eval_loop:
        assert not FLAGS.checkpoint_path

        checkpoint_path = None
        while True:
            checkpoint_path = tf.contrib.training.wait_for_new_checkpoint(
                FLAGS.model_dir, last_checkpoint=checkpoint_path)
            model_inference(model_dir=FLAGS.model_dir,
                            checkpoint_path=checkpoint_path,
                            hparams=hparams,
                            examples_path=FLAGS.examples_path,
                            output_dir=output_dir,
                            summary_writer=summary_writer,
                            master=FLAGS.master,
                            write_summary_every_step=False)
    else:
        model_inference(model_dir=FLAGS.model_dir,
                        checkpoint_path=FLAGS.checkpoint_path,
                        hparams=hparams,
                        examples_path=FLAGS.examples_path,
                        output_dir=output_dir,
                        summary_writer=summary_writer,
                        master=FLAGS.master)
def main(unused_argv):
  tf.logging.set_verbosity(FLAGS.log)
  tf.app.flags.mark_flags_as_required(['examples_path'])

  run_dir = os.path.expanduser(FLAGS.run_dir)

  hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                   model.get_default_hparams())

  # Command line flags override any of the preceding hyperparameter values.
  hparams.parse(FLAGS.hparams)

  run(hparams, run_dir)
Beispiel #9
0
def main(unused_argv):
    tf.logging.set_verbosity(FLAGS.log)
    tf.app.flags.mark_flags_as_required(['examples_path'])

    run_dir = os.path.expanduser(FLAGS.run_dir)

    hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                     model.get_default_hparams())

    # Command line flags override any of the preceding hyperparameter values.
    hparams.parse(FLAGS.hparams)

    run(hparams, run_dir)
def main(argv):
    tf.logging.set_verbosity(FLAGS.log)

    hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                     model.get_default_hparams())
    hparams.parse(FLAGS.hparams)

    for filename in argv[1:]:
        tf.logging.info('Generating spectrogram for %s...', filename)

        spec = create_spec(filename, hparams)
        spec_filename = filename + '.json'
        with tf.gfile.Open(spec_filename, 'w') as f:
            f.write(json.dumps(spec.tolist()))
            tf.logging.info('Wrote spectrogram json to %s.', spec_filename)
def main(argv):
  tf.logging.set_verbosity(FLAGS.log)

  hparams = tf_utils.merge_hparams(
      constants.DEFAULT_HPARAMS, model.get_default_hparams())
  hparams.parse(FLAGS.hparams)

  for filename in argv[1:]:
    tf.logging.info('Generating spectrogram for %s...', filename)

    spec = create_spec(filename, hparams)
    spec_filename = filename + '.json'
    with tf.gfile.Open(spec_filename, 'w') as f:
      f.write(json.dumps(spec.tolist()))
      tf.logging.info('Wrote spectrogram json to %s.', spec_filename)
Beispiel #12
0
def get_default_hparams():
  """Returns the default hyperparameters.

  Returns:
    A tf.HParams object representing the default hyperparameters for the model.
  """
  return tf_utils.merge_hparams(
      constants.DEFAULT_HPARAMS,
      tf.contrib.training.HParams(
          activation_loss=False,
          batch_size=8,
          clip_norm=3,
          combined_lstm_units=384,
          frame_bidirectional=False,
          frame_lstm_units=0,
          learning_rate=0.0006,
          decay_steps=10000,
          decay_rate=0.98,
          min_duration_ms=0,
          min_frame_occupancy_for_label=0.0,
          normalize_audio=False,
          onset_bidirectional=False,
          onset_delay=0,
          onset_length=32,
          onset_lstm_units=384,
          velocity_lstm_units=0,
          onset_mode='length_ms',
          sample_rate=constants.DEFAULT_SAMPLE_RATE,
          share_conv_features=False,
          spec_fmin=30.0,
          spec_hop_length=512,
          spec_log_amplitude=True,
          spec_mel_htk=True,
          spec_n_bins=229,
          spec_type='mel',
          stop_activation_gradient=False,
          stop_onset_gradient=False,
          truncated_length=1500,  # 48 seconds
          weight_frame_and_activation_loss=True))
Beispiel #13
0
def main(unused_argv):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.acoustic_checkpoint_filename:
        acoustic_checkpoint = os.path.join(
            os.path.expanduser(FLAGS.acoustic_run_dir), 'train',
            FLAGS.acoustic_checkpoint_filename)
    else:
        acoustic_checkpoint = tf.train.latest_checkpoint(
            os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train'))

    run_dir = os.path.expanduser(FLAGS.run_dir)

    hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                     model.get_default_hparams())
    hparams.parse(FLAGS.hparams)

    tf.gfile.MakeDirs(run_dir)

    model_inference(acoustic_checkpoint=acoustic_checkpoint,
                    hparams=hparams,
                    examples_path=FLAGS.examples_path,
                    run_dir=run_dir)
def main(unused_argv):
  tf.logging.set_verbosity(FLAGS.log)

  if FLAGS.acoustic_checkpoint_filename:
    acoustic_checkpoint = os.path.join(
        os.path.expanduser(FLAGS.acoustic_run_dir), 'train',
        FLAGS.acoustic_checkpoint_filename)
  else:
    acoustic_checkpoint = tf.train.latest_checkpoint(
        os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train'))

  run_dir = os.path.expanduser(FLAGS.run_dir)

  hparams = tf_utils.merge_hparams(
      constants.DEFAULT_HPARAMS, model.get_default_hparams())
  hparams.parse(FLAGS.hparams)

  tf.gfile.MakeDirs(run_dir)

  model_inference(
      acoustic_checkpoint=acoustic_checkpoint,
      hparams=hparams,
      examples_path=FLAGS.examples_path,
      run_dir=run_dir)
def main(argv):
    tf.logging.set_verbosity(FLAGS.log)

    hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                     model.get_default_hparams())
    # For this script, default to not using cudnn.
    hparams.use_cudnn = False
    hparams.parse(FLAGS.hparams)
    hparams.batch_size = 1

    with tf.Graph().as_default():
        examples = tf.placeholder(tf.string, [None])

        dataset = data.provide_batch(batch_size=1,
                                     examples=examples,
                                     hparams=hparams,
                                     is_training=False,
                                     truncated_length=0)

        estimator = train_util.create_estimator(
            os.path.expanduser(FLAGS.model_dir), hparams)

        iterator = dataset.make_initializable_iterator()
        next_record = iterator.get_next()

        with tf.Session() as sess:
            sess.run([
                tf.initializers.global_variables(),
                tf.initializers.local_variables()
            ])

            for filename in argv[1:]:
                tf.logging.info('Starting transcription for %s...', filename)

                # The reason we bounce between two Dataset objects is so we can use
                # the data processing functionality in data.py without having to
                # construct all the Example protos in memory ahead of time or create
                # a temporary tfrecord file.
                tf.logging.info('Processing file...')
                sess.run(iterator.initializer,
                         {examples: [create_example(filename)]})

                def input_fn():
                    return tf.data.Dataset.from_tensors(sess.run(next_record))

                tf.logging.info('Running inference...')
                checkpoint_path = None
                if FLAGS.checkpoint_path:
                    checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path)
                prediction_list = list(
                    estimator.predict(input_fn,
                                      checkpoint_path=checkpoint_path,
                                      yield_single_examples=False))
                assert len(prediction_list) == 1

                sequence_prediction = transcribe_audio(prediction_list[0],
                                                       hparams,
                                                       FLAGS.frame_threshold,
                                                       FLAGS.onset_threshold)

                midi_filename = filename + '.midi'
                midi_io.sequence_proto_to_midi_file(sequence_prediction,
                                                    midi_filename)

                tf.logging.info('Transcription written to %s.', midi_filename)
Beispiel #16
0

def proxy_find_library(lib):
    if lib == 'fluidsynth':
        return 'libfluidsynth.so.1'
    else:
        return orig_find_library(lib)


ctypes.util.find_library = proxy_find_library

CHECKPOINT_DIR = './train/train_50002'  ##todo

acoustic_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR)
print('acoustic_checkpoint=' + acoustic_checkpoint)
hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS,
                                 model.get_default_hparams())

with tf.Graph().as_default():
    examples = tf.placeholder(tf.string, [None])

    num_dims = constants.MIDI_PITCHES

    batch, iterator = data.provide_batch(batch_size=1,
                                         examples=examples,
                                         hparams=hparams,
                                         is_training=False,
                                         truncated_length=0)

    model.get_model(batch, hparams, is_training=False)

    session = tf.Session()
Beispiel #17
0
DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL = 0.0
DEFAULT_JITTER_AMOUNT_MS = 0
DEFAULT_MIN_DURATION_MS = 0
DEFAULT_BACKWARD_SHIFT_AMOUNT_MS = 0
DEFAULT_BIDIRECTIONAL = True
DEFAULT_ONSET_OVERLAP = True
DEFAULT_OFFSET_LENGTH = 100

DEFAULT_AUDIO_HPARAMS = tf.contrib.training.HParams(
    sample_rate=DEFAULT_SAMPLE_RATE,
    spec_type=DEFAULT_SPEC_TYPE,
    spec_mel_htk=DEFAULT_SPEC_MEL_HTK,
    spec_log_amplitude=DEFAULT_SPEC_LOG_AMPLITUDE,
    spec_hop_length=DEFAULT_SPEC_HOP_LENGTH,
    spec_n_bins=DEFAULT_SPEC_N_BINS,
    spec_fmin=DEFAULT_SPEC_FMIN,
    cqt_bins_per_octave=DEFAULT_CQT_BINS_PER_OCTAVE,
    onset_length=DEFAULT_ONSET_LENGTH,
    offset_length=DEFAULT_OFFSET_LENGTH,
    onset_mode=DEFAULT_ONSET_MODE,
    onset_delay=DEFAULT_ONSET_DELAY,
    min_frame_occupancy_for_label=DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL,
    jitter_amount_ms=DEFAULT_JITTER_AMOUNT_MS,
    min_duration_ms=DEFAULT_MIN_DURATION_MS,
    backward_shift_amount_ms=DEFAULT_BACKWARD_SHIFT_AMOUNT_MS,
    bidirectional=DEFAULT_BIDIRECTIONAL,
    onset_overlap=DEFAULT_ONSET_OVERLAP)

DEFAULT_HPARAMS = tf_utils.merge_hparams(
    DEFAULT_AUDIO_HPARAMS, audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS)
Beispiel #18
0
DEFAULT_HPARAMS = tf_utils.merge_hparams(
    audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS,
    contrib_training.HParams(
        eval_batch_size=1,
        predict_batch_size=1,
        shuffle_buffer_size=64,
        sample_rate=16000,
        spec_type='mel',
        spec_mel_htk=True,
        spec_log_amplitude=True,
        spec_hop_length=512,
        spec_n_bins=229,
        spec_fmin=30.0,  # A0
        cqt_bins_per_octave=36,
        truncated_length_secs=0.0,
        max_expected_train_example_len=0,
        onset_length=32,
        offset_length=32,
        onset_mode='length_ms',
        onset_delay=0,
        min_frame_occupancy_for_label=0.0,
        jitter_amount_ms=0,
        min_duration_ms=0,
        backward_shift_amount_ms=0,
        velocity_scale=80.0,
        velocity_bias=10.0,
        drum_data_map='',
        drum_prediction_map='',
        velocity_loss_weight=1.0,
        splice_n_examples=0,
        viterbi_decoding=False,
        viterbi_alpha=0.5))
DEFAULT_MIN_DURATION_MS = 0
DEFAULT_BACKWARD_SHIFT_AMOUNT_MS = 0
DEFAULT_BIDIRECTIONAL = True
DEFAULT_ONSET_OVERLAP = True
DEFAULT_OFFSET_LENGTH = 100

DEFAULT_AUDIO_HPARAMS = tf.contrib.training.HParams(
    sample_rate=DEFAULT_SAMPLE_RATE,
    spec_type=DEFAULT_SPEC_TYPE,
    spec_mel_htk=DEFAULT_SPEC_MEL_HTK,
    spec_log_amplitude=DEFAULT_SPEC_LOG_AMPLITUDE,
    spec_hop_length=DEFAULT_SPEC_HOP_LENGTH,
    spec_n_bins=DEFAULT_SPEC_N_BINS,
    spec_fmin=DEFAULT_SPEC_FMIN,
    cqt_bins_per_octave=DEFAULT_CQT_BINS_PER_OCTAVE,
    normalize_audio=DEFAULT_NORMALIZE_AUDIO,
    crop_training_sequence_to_notes=DEFAULT_CROP_TRAINING_SEQUENCE_TO_NOTES,
    onset_length=DEFAULT_ONSET_LENGTH,
    offset_length=DEFAULT_OFFSET_LENGTH,
    onset_mode=DEFAULT_ONSET_MODE,
    onset_delay=DEFAULT_ONSET_DELAY,
    min_frame_occupancy_for_label=DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL,
    jitter_amount_ms=DEFAULT_JITTER_AMOUNT_MS,
    min_duration_ms=DEFAULT_MIN_DURATION_MS,
    backward_shift_amount_ms=DEFAULT_BACKWARD_SHIFT_AMOUNT_MS,
    bidirectional=DEFAULT_BIDIRECTIONAL,
    onset_overlap=DEFAULT_ONSET_OVERLAP)

DEFAULT_HPARAMS = tf_utils.merge_hparams(
    DEFAULT_AUDIO_HPARAMS, audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS)
Beispiel #20
0
import tensorflow as tf_head

Config = collections.namedtuple('Config', ('model_fn', 'hparams'))

DEFAULT_HPARAMS = tf_utils.merge_hparams(
    audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS,
    tf_head.contrib.training.HParams(
        eval_batch_size=1,
        predict_batch_size=1,
        shuffle_buffer_size=64,
        sample_rate=16000,
        spec_type='mel',
        spec_mel_htk=True,
        spec_log_amplitude=True,
        spec_hop_length=512,
        spec_n_bins=229,
        spec_fmin=30.0,  # A0
        cqt_bins_per_octave=36,
        truncated_length_secs=0.0,
        max_expected_train_example_len=0,
        onset_length=32,
        offset_length=32,
        onset_mode='length_ms',
        onset_delay=0,
        min_frame_occupancy_for_label=0.0,
        jitter_amount_ms=0,
        min_duration_ms=0,
        backward_shift_amount_ms=0))

CONFIG_MAP = {}

CONFIG_MAP['onsets_frames'] = Config(
Beispiel #21
0
import tensorflow as tf

Config = collections.namedtuple('Config', ('model_fn', 'hparams'))

DEFAULT_HPARAMS = tf_utils.merge_hparams(
    audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS,
    tf.contrib.training.HParams(
        eval_batch_size=1,
        predict_batch_size=1,
        sample_rate=16000,
        spec_type='mel',
        spec_mel_htk=True,
        spec_log_amplitude=True,
        spec_hop_length=512,
        spec_n_bins=229,
        spec_fmin=30.0,  # A0
        cqt_bins_per_octave=36,
        truncated_length_secs=0,
        max_expected_train_example_len=0,
        onset_length=32,
        offset_length=32,
        onset_mode='length_ms',
        onset_delay=0,
        min_frame_occupancy_for_label=0.0,
        jitter_amount_ms=0,
        min_duration_ms=0,
        backward_shift_amount_ms=0))

CONFIG_MAP = {}

CONFIG_MAP['onsets_frames'] = Config(