def main(argv): tf.logging.set_verbosity(FLAGS.log) if FLAGS.acoustic_checkpoint_filename: acoustic_checkpoint = os.path.join( os.path.expanduser(FLAGS.acoustic_run_dir), 'train', FLAGS.acoustic_checkpoint_filename) else: acoustic_checkpoint = tf.train.latest_checkpoint( os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train')) hparams = tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) transcription_session = initialize_session(acoustic_checkpoint, hparams) for filename in argv[1:]: tf.logging.info('Starting transcription for %s...', filename) sequence_prediction = transcribe_audio( transcription_session, filename, FLAGS.frame_threshold, FLAGS.onset_threshold) midi_filename = filename + '.midi' midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) tf.logging.info('Transcription written to %s.', midi_filename)
def get_default_hparams(): """Returns the default hyperparameters. Returns: A tf.HParams object representing the default hyperparameters for the model. """ return tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, tf.contrib.training.HParams( activation_loss=False, batch_size=8, clip_norm=3, combined_lstm_units=128, frame_bidirectional=True, frame_lstm_units=0, learning_rate=0.0006, min_duration_ms=0, min_frame_occupancy_for_label=0.0, normalize_audio=False, onset_bidirectional=True, onset_delay=0, onset_length=32, onset_lstm_units=128, onset_mode='length_ms', sample_rate=constants.DEFAULT_SAMPLE_RATE, share_conv_features=False, spec_fmin=30.0, spec_hop_length=512, spec_log_amplitude=True, spec_n_bins=229, spec_type='mel', stop_activation_gradient=False, stop_onset_gradient=False, truncated_length=1500, # 48 seconds weight_frame_and_activation_loss=False))
def main(argv): tf.logging.set_verbosity(FLAGS.log) if FLAGS.acoustic_checkpoint_filename: acoustic_checkpoint = os.path.join( os.path.expanduser(FLAGS.acoustic_run_dir), 'train', FLAGS.acoustic_checkpoint_filename) else: acoustic_checkpoint = tf.train.latest_checkpoint( os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train')) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) transcription_session = initialize_session(acoustic_checkpoint, hparams) for filename in argv[1:]: tf.logging.info('Starting transcription for %s...', filename) sequence_prediction = transcribe_audio(transcription_session, filename, FLAGS.frame_threshold, FLAGS.onset_threshold) midi_filename = filename + '.midi' midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) tf.logging.info('Transcription written to %s.', midi_filename)
def main(unused_argv): if FLAGS.acoustic_checkpoint_filename: acoustic_checkpoint = os.path.join( os.path.expanduser(FLAGS.acoustic_run_dir), 'train', FLAGS.acoustic_checkpoint_filename) else: acoustic_checkpoint = tf.train.latest_checkpoint( os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train')) run_dir = os.path.expanduser(FLAGS.run_dir) hparams = tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) # Batch size should always be 1 for inference. hparams.batch_size = 1 tf.logging.info(hparams) tf.gfile.MakeDirs(run_dir) with tf.gfile.Open(os.path.join(run_dir, 'run_config.txt'), 'w') as f: f.write(acoustic_checkpoint + '\n') f.write(FLAGS.examples_path + '\n') f.write(str(hparams) + '\n') model_inference( acoustic_checkpoint=acoustic_checkpoint, hparams=hparams, examples_path=FLAGS.examples_path, run_dir=run_dir)
def main(unused_argv): output_dir = os.path.expanduser(FLAGS.output_dir) hparams = tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) # Batch size should always be 1 for inference. hparams.batch_size = 1 tf.logging.info(hparams) tf.gfile.MakeDirs(output_dir) summary_writer = tf.summary.FileWriter(logdir=output_dir) with tf.Session(): run_config = '\n\n'.join([ 'model_dir: ' + FLAGS.model_dir, 'checkpoint_path: ' + str(FLAGS.checkpoint_path), 'examples_path: ' + FLAGS.examples_path, str(hparams), ]) run_config_summary = tf.summary.text( 'run_config', tf.constant(run_config, name='run_config'), collections=[]) summary_writer.add_summary(run_config_summary.eval()) if FLAGS.eval_loop: assert not FLAGS.checkpoint_path checkpoint_path = None while True: checkpoint_path = tf.contrib.training.wait_for_new_checkpoint( FLAGS.model_dir, last_checkpoint=checkpoint_path) model_inference( model_dir=FLAGS.model_dir, checkpoint_path=checkpoint_path, hparams=hparams, examples_path=FLAGS.examples_path, output_dir=output_dir, summary_writer=summary_writer, master=FLAGS.master, write_summary_every_step=False) else: model_inference( model_dir=FLAGS.model_dir, checkpoint_path=FLAGS.checkpoint_path, hparams=hparams, examples_path=FLAGS.examples_path, output_dir=output_dir, summary_writer=summary_writer, master=FLAGS.master)
def init(self): tf.logging.set_verbosity(self.log) acoustic_checkpoint = tf.train.latest_checkpoint( os.path.join(os.path.expanduser(self.acoustic_run_dir), 'train')) default_hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) default_hparams.parse(self.hparams) self.transcription_session = self.initialize_session( acoustic_checkpoint, default_hparams)
def main(unused_argv): output_dir = os.path.expanduser(FLAGS.output_dir) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) # Batch size should always be 1 for inference. hparams.batch_size = 1 tf.logging.info(hparams) tf.gfile.MakeDirs(output_dir) summary_writer = tf.summary.FileWriter(logdir=output_dir) with tf.Session(): run_config = '\n\n'.join([ 'model_dir: ' + FLAGS.model_dir, 'checkpoint_path: ' + str(FLAGS.checkpoint_path), 'examples_path: ' + FLAGS.examples_path, str(hparams), ]) run_config_summary = tf.summary.text('run_config', tf.constant(run_config, name='run_config'), collections=[]) summary_writer.add_summary(run_config_summary.eval()) if FLAGS.eval_loop: assert not FLAGS.checkpoint_path checkpoint_path = None while True: checkpoint_path = tf.contrib.training.wait_for_new_checkpoint( FLAGS.model_dir, last_checkpoint=checkpoint_path) model_inference(model_dir=FLAGS.model_dir, checkpoint_path=checkpoint_path, hparams=hparams, examples_path=FLAGS.examples_path, output_dir=output_dir, summary_writer=summary_writer, master=FLAGS.master, write_summary_every_step=False) else: model_inference(model_dir=FLAGS.model_dir, checkpoint_path=FLAGS.checkpoint_path, hparams=hparams, examples_path=FLAGS.examples_path, output_dir=output_dir, summary_writer=summary_writer, master=FLAGS.master)
def main(unused_argv): tf.logging.set_verbosity(FLAGS.log) tf.app.flags.mark_flags_as_required(['examples_path']) run_dir = os.path.expanduser(FLAGS.run_dir) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) # Command line flags override any of the preceding hyperparameter values. hparams.parse(FLAGS.hparams) run(hparams, run_dir)
def main(argv): tf.logging.set_verbosity(FLAGS.log) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) for filename in argv[1:]: tf.logging.info('Generating spectrogram for %s...', filename) spec = create_spec(filename, hparams) spec_filename = filename + '.json' with tf.gfile.Open(spec_filename, 'w') as f: f.write(json.dumps(spec.tolist())) tf.logging.info('Wrote spectrogram json to %s.', spec_filename)
def main(argv): tf.logging.set_verbosity(FLAGS.log) hparams = tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) for filename in argv[1:]: tf.logging.info('Generating spectrogram for %s...', filename) spec = create_spec(filename, hparams) spec_filename = filename + '.json' with tf.gfile.Open(spec_filename, 'w') as f: f.write(json.dumps(spec.tolist())) tf.logging.info('Wrote spectrogram json to %s.', spec_filename)
def get_default_hparams(): """Returns the default hyperparameters. Returns: A tf.HParams object representing the default hyperparameters for the model. """ return tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, tf.contrib.training.HParams( activation_loss=False, batch_size=8, clip_norm=3, combined_lstm_units=384, frame_bidirectional=False, frame_lstm_units=0, learning_rate=0.0006, decay_steps=10000, decay_rate=0.98, min_duration_ms=0, min_frame_occupancy_for_label=0.0, normalize_audio=False, onset_bidirectional=False, onset_delay=0, onset_length=32, onset_lstm_units=384, velocity_lstm_units=0, onset_mode='length_ms', sample_rate=constants.DEFAULT_SAMPLE_RATE, share_conv_features=False, spec_fmin=30.0, spec_hop_length=512, spec_log_amplitude=True, spec_mel_htk=True, spec_n_bins=229, spec_type='mel', stop_activation_gradient=False, stop_onset_gradient=False, truncated_length=1500, # 48 seconds weight_frame_and_activation_loss=True))
def main(unused_argv): tf.logging.set_verbosity(FLAGS.log) if FLAGS.acoustic_checkpoint_filename: acoustic_checkpoint = os.path.join( os.path.expanduser(FLAGS.acoustic_run_dir), 'train', FLAGS.acoustic_checkpoint_filename) else: acoustic_checkpoint = tf.train.latest_checkpoint( os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train')) run_dir = os.path.expanduser(FLAGS.run_dir) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) tf.gfile.MakeDirs(run_dir) model_inference(acoustic_checkpoint=acoustic_checkpoint, hparams=hparams, examples_path=FLAGS.examples_path, run_dir=run_dir)
def main(unused_argv): tf.logging.set_verbosity(FLAGS.log) if FLAGS.acoustic_checkpoint_filename: acoustic_checkpoint = os.path.join( os.path.expanduser(FLAGS.acoustic_run_dir), 'train', FLAGS.acoustic_checkpoint_filename) else: acoustic_checkpoint = tf.train.latest_checkpoint( os.path.join(os.path.expanduser(FLAGS.acoustic_run_dir), 'train')) run_dir = os.path.expanduser(FLAGS.run_dir) hparams = tf_utils.merge_hparams( constants.DEFAULT_HPARAMS, model.get_default_hparams()) hparams.parse(FLAGS.hparams) tf.gfile.MakeDirs(run_dir) model_inference( acoustic_checkpoint=acoustic_checkpoint, hparams=hparams, examples_path=FLAGS.examples_path, run_dir=run_dir)
def main(argv): tf.logging.set_verbosity(FLAGS.log) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) # For this script, default to not using cudnn. hparams.use_cudnn = False hparams.parse(FLAGS.hparams) hparams.batch_size = 1 with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) dataset = data.provide_batch(batch_size=1, examples=examples, hparams=hparams, is_training=False, truncated_length=0) estimator = train_util.create_estimator( os.path.expanduser(FLAGS.model_dir), hparams) iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() with tf.Session() as sess: sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) for filename in argv[1:]: tf.logging.info('Starting transcription for %s...', filename) # The reason we bounce between two Dataset objects is so we can use # the data processing functionality in data.py without having to # construct all the Example protos in memory ahead of time or create # a temporary tfrecord file. tf.logging.info('Processing file...') sess.run(iterator.initializer, {examples: [create_example(filename)]}) def input_fn(): return tf.data.Dataset.from_tensors(sess.run(next_record)) tf.logging.info('Running inference...') checkpoint_path = None if FLAGS.checkpoint_path: checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path) prediction_list = list( estimator.predict(input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False)) assert len(prediction_list) == 1 sequence_prediction = transcribe_audio(prediction_list[0], hparams, FLAGS.frame_threshold, FLAGS.onset_threshold) midi_filename = filename + '.midi' midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) tf.logging.info('Transcription written to %s.', midi_filename)
def proxy_find_library(lib): if lib == 'fluidsynth': return 'libfluidsynth.so.1' else: return orig_find_library(lib) ctypes.util.find_library = proxy_find_library CHECKPOINT_DIR = './train/train_50002' ##todo acoustic_checkpoint = tf.train.latest_checkpoint(CHECKPOINT_DIR) print('acoustic_checkpoint=' + acoustic_checkpoint) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) num_dims = constants.MIDI_PITCHES batch, iterator = data.provide_batch(batch_size=1, examples=examples, hparams=hparams, is_training=False, truncated_length=0) model.get_model(batch, hparams, is_training=False) session = tf.Session()
DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL = 0.0 DEFAULT_JITTER_AMOUNT_MS = 0 DEFAULT_MIN_DURATION_MS = 0 DEFAULT_BACKWARD_SHIFT_AMOUNT_MS = 0 DEFAULT_BIDIRECTIONAL = True DEFAULT_ONSET_OVERLAP = True DEFAULT_OFFSET_LENGTH = 100 DEFAULT_AUDIO_HPARAMS = tf.contrib.training.HParams( sample_rate=DEFAULT_SAMPLE_RATE, spec_type=DEFAULT_SPEC_TYPE, spec_mel_htk=DEFAULT_SPEC_MEL_HTK, spec_log_amplitude=DEFAULT_SPEC_LOG_AMPLITUDE, spec_hop_length=DEFAULT_SPEC_HOP_LENGTH, spec_n_bins=DEFAULT_SPEC_N_BINS, spec_fmin=DEFAULT_SPEC_FMIN, cqt_bins_per_octave=DEFAULT_CQT_BINS_PER_OCTAVE, onset_length=DEFAULT_ONSET_LENGTH, offset_length=DEFAULT_OFFSET_LENGTH, onset_mode=DEFAULT_ONSET_MODE, onset_delay=DEFAULT_ONSET_DELAY, min_frame_occupancy_for_label=DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL, jitter_amount_ms=DEFAULT_JITTER_AMOUNT_MS, min_duration_ms=DEFAULT_MIN_DURATION_MS, backward_shift_amount_ms=DEFAULT_BACKWARD_SHIFT_AMOUNT_MS, bidirectional=DEFAULT_BIDIRECTIONAL, onset_overlap=DEFAULT_ONSET_OVERLAP) DEFAULT_HPARAMS = tf_utils.merge_hparams( DEFAULT_AUDIO_HPARAMS, audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS)
DEFAULT_HPARAMS = tf_utils.merge_hparams( audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS, contrib_training.HParams( eval_batch_size=1, predict_batch_size=1, shuffle_buffer_size=64, sample_rate=16000, spec_type='mel', spec_mel_htk=True, spec_log_amplitude=True, spec_hop_length=512, spec_n_bins=229, spec_fmin=30.0, # A0 cqt_bins_per_octave=36, truncated_length_secs=0.0, max_expected_train_example_len=0, onset_length=32, offset_length=32, onset_mode='length_ms', onset_delay=0, min_frame_occupancy_for_label=0.0, jitter_amount_ms=0, min_duration_ms=0, backward_shift_amount_ms=0, velocity_scale=80.0, velocity_bias=10.0, drum_data_map='', drum_prediction_map='', velocity_loss_weight=1.0, splice_n_examples=0, viterbi_decoding=False, viterbi_alpha=0.5))
DEFAULT_MIN_DURATION_MS = 0 DEFAULT_BACKWARD_SHIFT_AMOUNT_MS = 0 DEFAULT_BIDIRECTIONAL = True DEFAULT_ONSET_OVERLAP = True DEFAULT_OFFSET_LENGTH = 100 DEFAULT_AUDIO_HPARAMS = tf.contrib.training.HParams( sample_rate=DEFAULT_SAMPLE_RATE, spec_type=DEFAULT_SPEC_TYPE, spec_mel_htk=DEFAULT_SPEC_MEL_HTK, spec_log_amplitude=DEFAULT_SPEC_LOG_AMPLITUDE, spec_hop_length=DEFAULT_SPEC_HOP_LENGTH, spec_n_bins=DEFAULT_SPEC_N_BINS, spec_fmin=DEFAULT_SPEC_FMIN, cqt_bins_per_octave=DEFAULT_CQT_BINS_PER_OCTAVE, normalize_audio=DEFAULT_NORMALIZE_AUDIO, crop_training_sequence_to_notes=DEFAULT_CROP_TRAINING_SEQUENCE_TO_NOTES, onset_length=DEFAULT_ONSET_LENGTH, offset_length=DEFAULT_OFFSET_LENGTH, onset_mode=DEFAULT_ONSET_MODE, onset_delay=DEFAULT_ONSET_DELAY, min_frame_occupancy_for_label=DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL, jitter_amount_ms=DEFAULT_JITTER_AMOUNT_MS, min_duration_ms=DEFAULT_MIN_DURATION_MS, backward_shift_amount_ms=DEFAULT_BACKWARD_SHIFT_AMOUNT_MS, bidirectional=DEFAULT_BIDIRECTIONAL, onset_overlap=DEFAULT_ONSET_OVERLAP) DEFAULT_HPARAMS = tf_utils.merge_hparams( DEFAULT_AUDIO_HPARAMS, audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS)
import tensorflow as tf_head Config = collections.namedtuple('Config', ('model_fn', 'hparams')) DEFAULT_HPARAMS = tf_utils.merge_hparams( audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS, tf_head.contrib.training.HParams( eval_batch_size=1, predict_batch_size=1, shuffle_buffer_size=64, sample_rate=16000, spec_type='mel', spec_mel_htk=True, spec_log_amplitude=True, spec_hop_length=512, spec_n_bins=229, spec_fmin=30.0, # A0 cqt_bins_per_octave=36, truncated_length_secs=0.0, max_expected_train_example_len=0, onset_length=32, offset_length=32, onset_mode='length_ms', onset_delay=0, min_frame_occupancy_for_label=0.0, jitter_amount_ms=0, min_duration_ms=0, backward_shift_amount_ms=0)) CONFIG_MAP = {} CONFIG_MAP['onsets_frames'] = Config(
import tensorflow as tf Config = collections.namedtuple('Config', ('model_fn', 'hparams')) DEFAULT_HPARAMS = tf_utils.merge_hparams( audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS, tf.contrib.training.HParams( eval_batch_size=1, predict_batch_size=1, sample_rate=16000, spec_type='mel', spec_mel_htk=True, spec_log_amplitude=True, spec_hop_length=512, spec_n_bins=229, spec_fmin=30.0, # A0 cqt_bins_per_octave=36, truncated_length_secs=0, max_expected_train_example_len=0, onset_length=32, offset_length=32, onset_mode='length_ms', onset_delay=0, min_frame_occupancy_for_label=0.0, jitter_amount_ms=0, min_duration_ms=0, backward_shift_amount_ms=0)) CONFIG_MAP = {} CONFIG_MAP['onsets_frames'] = Config(