def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms(args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb hop_samples = args.frame_width*args.samplerate/44100 print("hop_samples", hop_samples) def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series(*aa.annotation, hop_samples=hop_samples) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size).prefetch(10) valid_hooks = [MetricsHook(), VisualOutputHook(False, False, True, True), SaveBestModelHook(args.logdir), CSVOutputWriterHook(), AdjustVoicingHook()] train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, valid_hooks=valid_hooks) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series( *aa.annotation, args.frame_width * args.samplerate / 44100) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb def preload_fn(aa): annot_path, uid = aa.annotation if uid.startswith("mdb_"): uid = uid + "_mel4" aa.annotation = datasets.Annotation.from_time_series( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train) if not args.voicing: for vd in validation_datasets: if not vd.name.startswith("small_"): vd.hooks.append(AdjustVoicingHook()) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMultiInstrumentRecognition(args) with network.session.graph.as_default(): if args.spectrogram == "YunNingHung_cqt": HOP_LENGTH = args.frame_width FMIN = 27.5 BINS_PER_OCTAVE = 12 N_BINS = 88 top_db = args.spectrogram_top_db filter_scale = args.spectrogram_filter_scale def spectrogram_function(audio, samplerate): print(np.min(audio), np.max(audio)) cqt = librosa.cqt(audio, sr=samplerate, hop_length=HOP_LENGTH, fmin=FMIN, n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE, filter_scale=filter_scale) print(np.min(cqt), np.max(cqt)) log_cqt = (librosa.core.amplitude_to_db( np.abs(cqt), ref=np.max, top_db=top_db) / top_db) + 1.0 log_cqt = np.expand_dims(log_cqt, 0) return (log_cqt * 65535).astype(np.uint16) spectrogram_thumb = "YunNingHung_cqt-fmin{}-oct{}-octbins{}-hop{}-db{}-fs{}-uint16".format( FMIN, N_BINS / BINS_PER_OCTAVE, BINS_PER_OCTAVE, HOP_LENGTH, top_db, filter_scale) spectrogram_info = (1, N_BINS, HOP_LENGTH, FMIN) else: spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb # all instruments in MusicNet # mapping between MIDI instrument and position in output probability tensor instrument_mappings = { 1: { "id": 0, "instrument": "piano" }, 7: { "id": 1, "instrument": "harpsichord" }, 41: { "id": 2, "instrument": "violin" }, 42: { "id": 3, "instrument": "viola" }, 43: { "id": 4, "instrument": "cello" }, 44: { "id": 5, "instrument": "contrabass" }, 61: { "id": 6, "instrument": "french horn" }, 69: { "id": 7, "instrument": "oboe" }, 71: { "id": 8, "instrument": "bassoon" }, 72: { "id": 9, "instrument": "clarinet" }, 74: { "id": 10, "instrument": "flute" }, } def preload_fn(aa): annot_path, uid = aa.annotation if uid.startswith("musicnet_mir"): aa.annotation = datasets.Annotation.from_musicnet_csv( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True, instrument_mappings=instrument_mappings) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) small_hooks = [ MetricsHook_mir(instrument_mappings), VisualOutputHook_mir() ] valid_hooks = [ AdjustVoicingHook_mir(), MetricsHook_mir(instrument_mappings), SaveBestModelHook(args.logdir, "micro f1"), BatchOutputWriterHook_mir(split="valid", output_reference=True) ] test_hooks = [ MetricsHook_mir(instrument_mappings, write_summaries=True, print_detailed=True, split="test"), BatchOutputWriterHook_mir(output_reference=True) ] if args.save_salience: test_hooks.append(SaveSaliencesHook()) print("preparing datasets...") train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, small_hooks_mf0=small_hooks, valid_hooks=valid_hooks, test_hooks=test_hooks) print("done preparing datasets") all_notes = train_dataset.all_notes() notes_count = np.zeros((args.note_range, )) for note_frame in all_notes: for note in note_frame: notes_count[int(note)] += 1 class_priors = notes_count / np.sum(notes_count) mean_prior = 1 / args.note_range class_weights = mean_prior / class_priors * (1 - class_priors) / ( 1 - mean_prior) class_weights = class_weights**0.3 print("weights", class_weights) # if not args.voicing: # for vd in validation_datasets: # if not vd.name.startswith("small_"): # vd.hooks.append(AdjustVoicingHook()) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info, class_weights=class_weights) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMultif0(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb def preload_fn(aa): annot_path, uid = aa.annotation if uid.startswith("mdb_"): uid = uid + "_mel4" if uid.startswith("maps_"): aa.annotation = datasets.Annotation.from_midi( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True) else: aa.annotation = datasets.Annotation.from_time_series( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) small_hooks = [MetricsHook_mf0(), VisualOutputHook_mf0()] valid_hooks = [ MetricsHook_mf0(), SaveBestModelHook(args.logdir, "Accuracy") ] test_hooks = [ MetricsHook_mf0(write_summaries=True, print_detailed=False, split="test"), CSVBatchOutputWriterHook_mf0(output_reference=True) ] print("preparing datasets...") train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, small_hooks_mf0=small_hooks, valid_hooks=valid_hooks, test_hooks=test_hooks) print("done preparing datasets") # if not args.voicing: # for vd in validation_datasets: # if not vd.name.startswith("small_"): # vd.hooks.append(AdjustVoicingHook()) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMultif0(args) HOP_LENGTH = args.frame_width # https://github.com/rainerkelz/framewise_2016/blob/master/datasets.py if args.spectrogram == "kelz": FMIN = 30 FMAX = 8000 NUMBANDS = 48 def spectrogram_function(audio, samplerate): audio_options = dict(num_channels=1, sample_rate=samplerate, filterbank=LogarithmicFilterbank, frame_size=4096, fft_size=4096, hop_size=HOP_LENGTH, num_bands=NUMBANDS, fmin=FMIN, fmax=FMAX, fref=440.0, norm_filters=True, unique_filters=True, circular_shift=False, norm=True) x = LogarithmicFilteredSpectrogram(audio, **audio_options) x = x.T x = x / np.max(x) x = np.expand_dims(x, 0) return (np.array(x) * 65535).astype(np.uint16) N_BINS = 229 spectrogram_thumb = "kelz-fmin{}-fmax{}-bands{}-hop{}-uint16".format( FMIN, FMAX, NUMBANDS, HOP_LENGTH) spectrogram_info = (1, N_BINS, HOP_LENGTH, FMIN) elif args.spectrogram == "cqt_kelz": FMIN = 32.7 BINS_PER_OCTAVE = 48 N_BINS = BINS_PER_OCTAVE * 5 top_db = 110 filter_scale = 1.0 def spectrogram_function(audio, samplerate): cqt = librosa.cqt(audio, sr=samplerate, hop_length=HOP_LENGTH, fmin=FMIN, n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE, filter_scale=filter_scale) log_cqt = (librosa.core.amplitude_to_db( np.abs(cqt), ref=np.max, top_db=top_db) / top_db) + 1.0 log_cqt = np.expand_dims(log_cqt, 0) return (log_cqt * 65535).astype(np.uint16) spectrogram_thumb = "cqt-fmin{}-oct{}-octbins{}-hop{}-db{}-fs{}-uint16".format( FMIN, N_BINS / BINS_PER_OCTAVE, BINS_PER_OCTAVE, HOP_LENGTH, top_db, filter_scale) spectrogram_info = (1, N_BINS, HOP_LENGTH, FMIN) else: spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb with network.session.graph.as_default(): def preload_fn(aa): annot_path, uid = aa.annotation if uid.startswith("mdb_"): uid = uid + "_mel4" if uid.startswith("maps_"): aa.annotation = datasets.Annotation.from_midi( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True) else: aa.annotation = datasets.Annotation.from_time_series( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) small_hooks = [MetricsHook_mf0(), VisualOutputHook_mf0()] valid_hooks = [ MetricsHook_mf0(), SaveBestModelHook(args.logdir, "Accuracy") ] test_hooks = [ MetricsHook_mf0(write_summaries=True, print_detailed=False, split="test"), CSVBatchOutputWriterHook_mf0(output_reference=True) ] train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, small_hooks_mf0=small_hooks, valid_hooks=valid_hooks, test_hooks=test_hooks) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets