def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms(args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb hop_samples = args.frame_width*args.samplerate/44100 print("hop_samples", hop_samples) def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series(*aa.annotation, hop_samples=hop_samples) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size).prefetch(10) valid_hooks = [MetricsHook(), VisualOutputHook(False, False, True, True), SaveBestModelHook(args.logdir), CSVOutputWriterHook(), AdjustVoicingHook()] train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, valid_hooks=valid_hooks) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series( *aa.annotation, args.frame_width * args.samplerate / 44100) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): def spec_function(audio, samplerate): cqt_list = [] shapes = [] for h in HARMONICS: cqt = librosa.cqt( audio, sr=samplerate, hop_length=HOP_LENGTH, fmin=FMIN*float(h), n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE ) cqt_list.append(cqt) shapes.append(cqt.shape) shapes_equal = [s == shapes[0] for s in shapes] if not all(shapes_equal): print("NOT ALL", shapes_equal) min_time = np.min([s[1] for s in shapes]) new_cqt_list = [] for i in range(len(cqt_list)): new_cqt_list.append(cqt_list[i][:, :min_time]) cqt_list = new_cqt_list log_hcqt = ((1.0/80.0) * librosa.core.amplitude_to_db( np.abs(np.array(cqt_list)), ref=np.max)) + 1.0 return (log_hcqt*65535).astype(np.uint16) # cqt = librosa.core.cqt(audio, samplerate, hop_length=HOP_LENGTH, fmin=FMIN, n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE) # # log scaling # cqt = librosa.amplitude_to_db(np.abs(cqt), ref=np.max) # # uint8 compression # cqt = ((cqt/80+1)*255).astype(np.uint8) # return cqt spectrogram_thumb = "hcqt-fmin{}-oct{}-octbins{}-hop{}-db-uint16".format(FMIN, N_BINS/BINS_PER_OCTAVE, BINS_PER_OCTAVE, HOP_LENGTH) def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series(*aa.annotation, 512) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(spec_function, spectrogram_thumb, HOP_LENGTH) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size).prefetch(10) valid_hooks = [MetricsHook(write_estimations=True), VisualOutputHook(False, False, True, True), SaveBestModelHook(args.logdir), AdjustVoicingHook()] train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, valid_hooks=valid_hooks) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms( args) # save spectrogram_thumb to hyperparams args.spectrogram_thumb = spectrogram_thumb def preload_fn(aa): annot_path, uid = aa.annotation if uid.startswith("mdb_"): uid = uid + "_mel4" aa.annotation = datasets.Annotation.from_time_series( annot_path, uid, hop_samples=args.frame_width * args.samplerate / 44100, unique_mf0=True) aa.audio.load_resampled_audio(args.samplerate).load_spectrogram( spectrogram_function, spectrogram_thumb, spectrogram_info[2]) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle( 10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch( args.batch_size).prefetch(10) train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train) if not args.voicing: for vd in validation_datasets: if not vd.name.startswith("small_"): vd.hooks.append(AdjustVoicingHook()) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series(*aa.annotation) aa.audio.load_resampled_audio(args.samplerate) def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).filter(dataset.is_example_voiced).batch(args.batch_size).prefetch(10) train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes) return network, train_dataset, validation_datasets, test_datasets
def construct(args): network = NetworkMelody(args) with network.session.graph.as_default(): def preload_fn(aa): aa.annotation = datasets.Annotation.from_time_series( *aa.annotation) aa.audio.load_resampled_audio(args.samplerate) # augment_audio_basa = datasets.Audio("/mnt/tera/jirka/V1/MatthewEntwistle_FairerHopes/MatthewEntwistle_FairerHopes_STEMS/MatthewEntwistle_FairerHopes_STEM_07.wav", # "augment_low").load_resampled_audio(args.samplerate).slice(20, 30) # augment_audio_perkuse = datasets.Audio("/mnt/tera/jirka/V1/MatthewEntwistle_FairerHopes/MatthewEntwistle_FairerHopes_STEMS/MatthewEntwistle_FairerHopes_STEM_08.wav", # "augment_low").load_resampled_audio(args.samplerate).slice(20, 30) # augment_audio = augment_audio_basa.samples*10 + augment_audio_perkuse.samples*10 def dataset_transform(tf_dataset, dataset): return tf_dataset.map(dataset.prepare_example).batch( args.batch_size_evaluation).prefetch(10) # return tf_dataset.map(dataset.prepare_example).map(dataset.mix_example_with(augment_audio)).batch(args.batch_size_evaluation).prefetch(1) def dataset_transform_train(tf_dataset, dataset): return tf_dataset.shuffle(10**5).map( dataset.prepare_example, num_parallel_calls=4).filter(dataset.is_example_voiced).batch( args.batch_size).prefetch(10) train_dataset, test_datasets, validation_datasets = common.prepare_datasets( args.datasets, args, preload_fn, dataset_transform, dataset_transform_train) # Add voicing hook to the validation dataset for vd in validation_datasets: if not vd.name.startswith("small_"): vd.hooks.append(AdjustVoicingHook()) network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes) return network, train_dataset, validation_datasets, test_datasets