Ejemplo n.º 1
0
def construct(args):
    network = NetworkMelody(args)

    with network.session.graph.as_default():
        spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms(args)
        # save spectrogram_thumb to hyperparams
        args.spectrogram_thumb = spectrogram_thumb

        hop_samples = args.frame_width*args.samplerate/44100
        print("hop_samples", hop_samples)
        def preload_fn(aa):
            aa.annotation = datasets.Annotation.from_time_series(*aa.annotation, hop_samples=hop_samples)
            aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(spectrogram_function, spectrogram_thumb, spectrogram_info[2])

        def dataset_transform(tf_dataset, dataset):
            return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10)

        def dataset_transform_train(tf_dataset, dataset):
            return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size).prefetch(10)

        valid_hooks = [MetricsHook(), VisualOutputHook(False, False, True, True), SaveBestModelHook(args.logdir), CSVOutputWriterHook(), AdjustVoicingHook()]
        train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, valid_hooks=valid_hooks)

        network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes, spectrogram_info=spectrogram_info)

    return network, train_dataset, validation_datasets, test_datasets
Ejemplo n.º 2
0
def construct(args):
    network = NetworkMelody(args)

    with network.session.graph.as_default():
        spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms(
            args)

        def preload_fn(aa):
            aa.annotation = datasets.Annotation.from_time_series(
                *aa.annotation, args.frame_width * args.samplerate / 44100)
            aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(
                spectrogram_function, spectrogram_thumb, spectrogram_info[2])

        def dataset_transform(tf_dataset, dataset):
            return tf_dataset.map(dataset.prepare_example,
                                  num_parallel_calls=args.threads).batch(
                                      args.batch_size_evaluation).prefetch(10)

        def dataset_transform_train(tf_dataset, dataset):
            return tf_dataset.shuffle(
                10**5).map(dataset.prepare_example,
                           num_parallel_calls=args.threads).batch(
                               args.batch_size).prefetch(10)

        train_dataset, test_datasets, validation_datasets = common.prepare_datasets(
            args.datasets, args, preload_fn, dataset_transform,
            dataset_transform_train)

        network.construct(args,
                          create_model,
                          train_dataset.dataset.output_types,
                          train_dataset.dataset.output_shapes,
                          spectrogram_info=spectrogram_info)

    return network, train_dataset, validation_datasets, test_datasets
Ejemplo n.º 3
0
def construct(args):
    network = NetworkMelody(args)

    with network.session.graph.as_default():
        def spec_function(audio, samplerate):

            cqt_list = []
            shapes = []
            for h in HARMONICS:
                cqt = librosa.cqt(
                    audio, sr=samplerate, hop_length=HOP_LENGTH, fmin=FMIN*float(h),
                    n_bins=N_BINS,
                    bins_per_octave=BINS_PER_OCTAVE
                )
                cqt_list.append(cqt)
                shapes.append(cqt.shape)

            shapes_equal = [s == shapes[0] for s in shapes]
            if not all(shapes_equal):
                print("NOT ALL", shapes_equal)
                min_time = np.min([s[1] for s in shapes])
                new_cqt_list = []
                for i in range(len(cqt_list)):
                    new_cqt_list.append(cqt_list[i][:, :min_time])
                cqt_list = new_cqt_list

            log_hcqt = ((1.0/80.0) * librosa.core.amplitude_to_db(
                np.abs(np.array(cqt_list)), ref=np.max)) + 1.0

            return (log_hcqt*65535).astype(np.uint16)

            # cqt = librosa.core.cqt(audio, samplerate, hop_length=HOP_LENGTH, fmin=FMIN, n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE)
            # # log scaling
            # cqt = librosa.amplitude_to_db(np.abs(cqt), ref=np.max)
            # # uint8 compression
            # cqt = ((cqt/80+1)*255).astype(np.uint8)
            # return cqt

        spectrogram_thumb = "hcqt-fmin{}-oct{}-octbins{}-hop{}-db-uint16".format(FMIN, N_BINS/BINS_PER_OCTAVE, BINS_PER_OCTAVE, HOP_LENGTH)

        def preload_fn(aa):
            aa.annotation = datasets.Annotation.from_time_series(*aa.annotation, 512)
            aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(spec_function, spectrogram_thumb, HOP_LENGTH)

        def dataset_transform(tf_dataset, dataset):
            return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10)

        def dataset_transform_train(tf_dataset, dataset):
            return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size).prefetch(10)

        valid_hooks = [MetricsHook(write_estimations=True), VisualOutputHook(False, False, True, True), SaveBestModelHook(args.logdir), AdjustVoicingHook()]
        train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train, valid_hooks=valid_hooks)

        network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes)

    return network, train_dataset, validation_datasets, test_datasets
Ejemplo n.º 4
0
def construct(args):
    network = NetworkMelody(args)

    with network.session.graph.as_default():
        spectrogram_function, spectrogram_thumb, spectrogram_info = common.spectrograms(
            args)
        # save spectrogram_thumb to hyperparams
        args.spectrogram_thumb = spectrogram_thumb

        def preload_fn(aa):
            annot_path, uid = aa.annotation
            if uid.startswith("mdb_"):
                uid = uid + "_mel4"
            aa.annotation = datasets.Annotation.from_time_series(
                annot_path,
                uid,
                hop_samples=args.frame_width * args.samplerate / 44100,
                unique_mf0=True)
            aa.audio.load_resampled_audio(args.samplerate).load_spectrogram(
                spectrogram_function, spectrogram_thumb, spectrogram_info[2])

        def dataset_transform(tf_dataset, dataset):
            return tf_dataset.map(dataset.prepare_example,
                                  num_parallel_calls=args.threads).batch(
                                      args.batch_size_evaluation).prefetch(10)

        def dataset_transform_train(tf_dataset, dataset):
            return tf_dataset.shuffle(
                10**5).map(dataset.prepare_example,
                           num_parallel_calls=args.threads).batch(
                               args.batch_size).prefetch(10)

        train_dataset, test_datasets, validation_datasets = common.prepare_datasets(
            args.datasets, args, preload_fn, dataset_transform,
            dataset_transform_train)

        if not args.voicing:
            for vd in validation_datasets:
                if not vd.name.startswith("small_"):
                    vd.hooks.append(AdjustVoicingHook())

        network.construct(args,
                          create_model,
                          train_dataset.dataset.output_types,
                          train_dataset.dataset.output_shapes,
                          spectrogram_info=spectrogram_info)

    return network, train_dataset, validation_datasets, test_datasets
Ejemplo n.º 5
0
def construct(args):
    network = NetworkMelody(args)

    with network.session.graph.as_default():
        def preload_fn(aa):
            aa.annotation = datasets.Annotation.from_time_series(*aa.annotation)
            aa.audio.load_resampled_audio(args.samplerate)

        def dataset_transform(tf_dataset, dataset):
            return tf_dataset.map(dataset.prepare_example, num_parallel_calls=args.threads).batch(args.batch_size_evaluation).prefetch(10)

        def dataset_transform_train(tf_dataset, dataset):
            return tf_dataset.shuffle(10**5).map(dataset.prepare_example, num_parallel_calls=args.threads).filter(dataset.is_example_voiced).batch(args.batch_size).prefetch(10)

        train_dataset, test_datasets, validation_datasets = common.prepare_datasets(args.datasets, args, preload_fn, dataset_transform, dataset_transform_train)

        network.construct(args, create_model, train_dataset.dataset.output_types, train_dataset.dataset.output_shapes)

    return network, train_dataset, validation_datasets, test_datasets
Ejemplo n.º 6
0
def construct(args):
    network = NetworkMelody(args)

    with network.session.graph.as_default():

        def preload_fn(aa):
            aa.annotation = datasets.Annotation.from_time_series(
                *aa.annotation)
            aa.audio.load_resampled_audio(args.samplerate)

        # augment_audio_basa = datasets.Audio("/mnt/tera/jirka/V1/MatthewEntwistle_FairerHopes/MatthewEntwistle_FairerHopes_STEMS/MatthewEntwistle_FairerHopes_STEM_07.wav",
        #                                     "augment_low").load_resampled_audio(args.samplerate).slice(20, 30)
        # augment_audio_perkuse = datasets.Audio("/mnt/tera/jirka/V1/MatthewEntwistle_FairerHopes/MatthewEntwistle_FairerHopes_STEMS/MatthewEntwistle_FairerHopes_STEM_08.wav",
        #                                        "augment_low").load_resampled_audio(args.samplerate).slice(20, 30)

        # augment_audio = augment_audio_basa.samples*10 + augment_audio_perkuse.samples*10

        def dataset_transform(tf_dataset, dataset):
            return tf_dataset.map(dataset.prepare_example).batch(
                args.batch_size_evaluation).prefetch(10)
            # return tf_dataset.map(dataset.prepare_example).map(dataset.mix_example_with(augment_audio)).batch(args.batch_size_evaluation).prefetch(1)

        def dataset_transform_train(tf_dataset, dataset):
            return tf_dataset.shuffle(10**5).map(
                dataset.prepare_example,
                num_parallel_calls=4).filter(dataset.is_example_voiced).batch(
                    args.batch_size).prefetch(10)

        train_dataset, test_datasets, validation_datasets = common.prepare_datasets(
            args.datasets, args, preload_fn, dataset_transform,
            dataset_transform_train)

        # Add voicing hook to the validation dataset
        for vd in validation_datasets:
            if not vd.name.startswith("small_"):
                vd.hooks.append(AdjustVoicingHook())

        network.construct(args, create_model,
                          train_dataset.dataset.output_types,
                          train_dataset.dataset.output_shapes)

    return network, train_dataset, validation_datasets, test_datasets