Beispiel #1
0
    def test_execute(self, ac_ext):

        sample_rate = 22050
        seg_duration = 0.1
        seg_overlap = 0.5

        parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                    label_file=TEST_LABEL_PATH)
        sf_pro = SegmentFeatureProcessor([ac_ext])
        sc_gen = SegmentContainerGenerator(TEST_AUDIO_PATH_TUPLE_1[0],
                                           sf_pro,
                                           label_parser=parser,
                                           seg_duration=seg_duration,
                                           seg_overlap=seg_overlap)

        sc_list = [sc for sc in sc_gen.execute()]

        id0132_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_1))
        id1238_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_4))

        first_seg = sc_list[0].segments[0]
        first_seg_ref = id0132_data[:int(seg_duration * sample_rate)]
        last_seg = sc_list[-1].segments[-1]
        start_time = 0.0
        while start_time + seg_duration < len(id1238_data) / sample_rate:
            start_time += seg_duration * seg_overlap
        start_time -= seg_duration * seg_overlap
        last_seg_ref = \
            id1238_data[int(start_time*sample_rate):int((start_time+seg_duration)*sample_rate)]

        assert (len(sc_list) == 4
                and np.all(first_seg_ref == first_seg.features["audio_chunk"])
                and np.all(last_seg_ref == last_seg.features["audio_chunk"]))
Beispiel #2
0
 def test_init(self):
     try:
         parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                     label_file=TEST_LABEL_PATH)
         sf_pro = SegmentFeatureProcessor([])
         SegmentContainerGenerator("fake_audio_root",
                                   sf_pro,
                                   label_parser=parser)
     except Exception as e:
         pytest.fail("Unexpected Error: {}".format(e))
Beispiel #3
0
    def test_data_multiple_features(self):
        """
        Compare data from "manual" constructor to config file-based constructor.
        """

        # construct minibatch generator "manually"

        sample_rate = 22050
        win_size = 256
        hop_size = 128
        energy_threshold = 0.2
        spectral_flatness_threshold = 0.3
        seg_duration = 0.2
        seg_overlap = 0.5

        batch_size = 50
        num_features = 64
        n_time_bins = 34

        af_gen = AudioFrameGen(sample_rate=sample_rate,
                               win_size=win_size,
                               hop_size=hop_size)

        en_ext = EnergyExtractor()
        sf_ext = SpectralFlatnessExtractor()
        mel_ext = MelSpectrumExtractor(sample_rate=sample_rate,
                                       fft_size=win_size,
                                       n_mels=64,
                                       min_freq=0,
                                       max_freq=sample_rate / 2)
        ff_pro = FrameFeatureProcessor(af_gen, [en_ext, sf_ext, mel_ext],
                                       feature_container_root=FEATURE_ROOT)

        ffc_ext = FrameFeatureChunkExtractor("mel_spectrum")
        act_det = Simple(
            energy_threshold=energy_threshold,
            spectral_flatness_threshold=spectral_flatness_threshold)
        sf_pro = SegmentFeatureProcessor([act_det, ffc_ext],
                                         ff_pro=ff_pro,
                                         audio_root=DATA_PATH)

        parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                    label_file=TEST_LABEL_PATH)
        sc_gen = SegmentContainerGenerator(DATA_PATH,
                                           sf_pro,
                                           label_parser=parser,
                                           seg_duration=seg_duration,
                                           seg_overlap=seg_overlap)

        mb_gen_1 = MiniBatchGen(
            sc_gen, batch_size, {
                "mel_spectrum": {
                    "feature_size": num_features,
                    "n_time_bins": n_time_bins
                }
            }, 0)

        # parse json file
        with open(CONFIG_PATH) as config_file:
            config = json.loads(config_file.read())

        config["features"] = [{
            'name': 'audio_chunk',
            'config': {}
        }, {
            'name': 'mel_spectrum',
            'config': {
                'n_mels': 64,
                'min_freq': 0,
                'max_freq': 11025,
                'log_amp': 1
            }
        }]

        # construct minibatch generator from config
        mb_gen_dict = MiniBatchGen.from_config(config)
        mb_gen_2 = mb_gen_dict["default"]

        # execute and compare
        try:
            mb_gen_1_e = mb_gen_1.execute(active_segments_only=True,
                                          with_targets=True,
                                          with_filenames=False)

            mb_gen_2_e = mb_gen_2.execute(active_segments_only=True,
                                          with_targets=True,
                                          with_filenames=False)

            if not os.path.exists(FEATURE_ROOT):
                os.makedirs(FEATURE_ROOT)

            for mb1, mb2 in zip(mb_gen_1_e, mb_gen_2_e):
                assert np.all(mb1[0]["mel_spectrum"] == mb2[0]["mel_spectrum"])
                assert np.all(mb1[1] == mb2[1])

        except Exception as e:
            pytest.fail("Unexpected Error: {}".format(e))

        finally:
            shutil.rmtree(FEATURE_ROOT)
Beispiel #4
0
    def test_audio_data(self):
        """
        Compare data from "manual" constructor to config file-based constructor,
        with raw audio feature.
        """

        # construct minibatch generator "manually"

        sample_rate = 22050
        seg_duration = 0.2
        seg_overlap = 0.5

        batch_size = 50
        num_features = 1
        n_time_bins = 4410

        ac_ext = AudioChunkExtractor(DATA_PATH, sample_rate)
        sf_pro = SegmentFeatureProcessor([ac_ext],
                                         ff_pro=None,
                                         audio_root=DATA_PATH)

        parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                    label_file=TEST_LABEL_PATH)
        sc_gen = SegmentContainerGenerator(DATA_PATH,
                                           sf_pro,
                                           label_parser=parser,
                                           seg_duration=seg_duration,
                                           seg_overlap=seg_overlap)

        mb_gen_1 = MiniBatchGen(
            sc_gen, batch_size, {
                "audio_chunk": {
                    "feature_size": num_features,
                    "n_time_bins": n_time_bins
                }
            }, 0)

        # parse json file
        with open(CONFIG_PATH) as config_file:
            config = json.loads(config_file.read())

        # replace feature by audio_chunk and remove activity detection
        config["features"] = [{"name": "audio_chunk", "config": {}}]
        config.pop("activity_detection")

        # construct minibatch generator from config
        mb_gen_dict = MiniBatchGen.from_config(config)
        mb_gen_2 = mb_gen_dict["default"]

        # execute and compare
        mb_gen_1_e = mb_gen_1.execute(active_segments_only=False,
                                      with_targets=True,
                                      with_filenames=True)

        mb_gen_2_e = mb_gen_2.execute(active_segments_only=False,
                                      with_targets=True,
                                      with_filenames=True)

        for mb1, mb2 in zip(mb_gen_1_e, mb_gen_2_e):
            assert np.all(mb1[0]["audio_chunk"] == mb2[0]["audio_chunk"])
            assert np.all(mb1[1] == mb2[1])
            assert np.all(mb1[2] == mb2[2])
Beispiel #5
0
    def test_gen_minibatches_2d_w_pca_scaler(self):

        sample_rate = 22050
        win_size = 256
        hop_size = 128
        energy_threshold = 0.2
        spectral_flatness_threshold = 0.3
        seg_duration = 0.1
        seg_overlap = 0.5

        batch_size = 10
        num_features = 16
        n_time_bins = 17

        af_gen = AudioFrameGen(sample_rate=sample_rate,
                               win_size=win_size,
                               hop_size=hop_size)

        en_ext = EnergyExtractor()
        sf_ext = SpectralFlatnessExtractor()
        mel_ext = MelSpectrumExtractor(sample_rate=sample_rate,
                                       fft_size=win_size,
                                       n_mels=64,
                                       min_freq=0,
                                       max_freq=sample_rate / 2)
        ff_pro = FrameFeatureProcessor(af_gen, [en_ext, sf_ext, mel_ext],
                                       feature_container_root=FEATURE_ROOT)

        pca = joblib.load(
            os.path.join(DATA_PATH, "transform/mel64_pca16_norm/pca.jl"))
        scaler = joblib.load(
            os.path.join(DATA_PATH, "transform/mel64_pca16_norm/scaler.jl"))

        ffc_ext = FrameFeatureChunkExtractor("mel_spectrum", pca, scaler)
        act_det = Simple(
            energy_threshold=energy_threshold,
            spectral_flatness_threshold=spectral_flatness_threshold)
        sf_pro = SegmentFeatureProcessor([act_det, ffc_ext],
                                         ff_pro=ff_pro,
                                         audio_root=DATA_PATH)

        parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                    label_file=TEST_LABEL_PATH)
        sc_gen = SegmentContainerGenerator(DATA_PATH,
                                           sf_pro,
                                           label_parser=parser,
                                           seg_duration=seg_duration,
                                           seg_overlap=seg_overlap)

        sc_gen_e = sc_gen.execute()

        active_segments = []

        # compare data in segment and corresponding data in feature container
        for sc in sc_gen_e:
            fc_path = os.path.join(FEATURE_ROOT,
                                   sc.audio_path.replace(".wav", ".fc.jl"))
            fc = feature_container.FeatureContainer.load(fc_path)
            for s in sc.segments:
                if hasattr(s, 'activity') and s.activity:
                    start_ind = fc.time_to_frame_ind(s.start_time)
                    end_ind = start_ind + n_time_bins
                    data = scaler.transform(
                        pca.transform(fc.features["mel_spectrum"]["data"]
                                      [start_ind:end_ind]))
                    assert np.all(data == s.features["mel_spectrum"])
                    active_segments.append(s)

        # compare data in segment and corresponding data in minibatches

        mb_gen = MiniBatchGen(
            sc_gen, batch_size, {
                "mel_spectrum": {
                    "feature_size": num_features,
                    "n_time_bins": n_time_bins
                }
            }, 0)

        mb_gen_e = mb_gen.execute(active_segments_only=True,
                                  with_targets=False,
                                  with_filenames=False)

        count = 0
        for mb, in mb_gen_e:
            for data in mb["mel_spectrum"]:
                assert np.all(data[0].T ==
                              active_segments[count].features["mel_spectrum"])
                count += 1
Beispiel #6
0
    def test_gen_minibatches_multiple_features_1(self, ac_ext):
        sample_rate = 22050
        win_size = 256
        hop_size = 128
        seg_duration = 0.1
        seg_overlap = 0.5
        seg_size = int(seg_duration * sample_rate)

        parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                    label_file=TEST_LABEL_PATH)
        classes = parser.get_labels()

        n_epochs = 1
        batch_size = 10
        num_features_mel = 64
        n_time_bins_mel = 17
        num_features_audio = 1
        n_time_bins_audio = 2205

        af_gen = AudioFrameGen(sample_rate=sample_rate,
                               win_size=win_size,
                               hop_size=hop_size)

        mel_ext = MelSpectrumExtractor(sample_rate=sample_rate,
                                       fft_size=win_size,
                                       n_mels=num_features_mel,
                                       min_freq=0,
                                       max_freq=sample_rate / 2)
        ff_pro = FrameFeatureProcessor(af_gen, [mel_ext])

        pca = None
        scaler = None

        ffc_ext = FrameFeatureChunkExtractor("mel_spectrum", pca, scaler)

        sf_pro = SegmentFeatureProcessor([ac_ext, ffc_ext],
                                         ff_pro=ff_pro,
                                         audio_root=DATA_PATH)

        sc_gen = SegmentContainerGenerator(DATA_PATH,
                                           sf_pro,
                                           label_parser=parser,
                                           seg_duration=seg_duration,
                                           seg_overlap=seg_overlap)

        id0132_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_1))
        id0133_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_2))
        id1238_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_3))
        id1322_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_4))

        chunk_size = sample_rate * seg_duration
        # -1 because the number of audio chunks and frame feature chunks differ by 1
        id0132_n_chunks = utils.get_n_overlapping_chunks(
            len(id0132_data), chunk_size, seg_overlap) - 1
        # -1 because the number of audio chunks and frame feature chunks differ by 1
        id0133_n_chunks = utils.get_n_overlapping_chunks(
            len(id0133_data), chunk_size, seg_overlap) - 1
        id1238_n_chunks = utils.get_n_overlapping_chunks(
            len(id1238_data), chunk_size, seg_overlap)
        id1322_n_chunks = utils.get_n_overlapping_chunks(
            len(id1322_data), chunk_size, seg_overlap)

        n_minibatches = (id0132_n_chunks + id0133_n_chunks + id1238_n_chunks +
                         id1322_n_chunks) // batch_size

        mb_gen = MiniBatchGen(
            sc_gen, batch_size, {
                "mel_spectrum": {
                    "feature_size": num_features_mel,
                    "n_time_bins": n_time_bins_mel
                },
                "audio_chunk": {
                    "feature_size": num_features_audio,
                    "n_time_bins": n_time_bins_audio
                }
            }, 0)

        for _ in range(n_epochs):
            mb_gen_e = mb_gen.execute(with_targets=True, with_filenames=True)
            count = 0
            start_time = 0.0
            chunk_count = 0
            is_dataset1, is_dataset2, is_dataset3 = [True, True, True]
            for data, targets, filenames in mb_gen_e:
                for d, t, f in zip(data["audio_chunk"], targets, filenames):
                    start_ind = int(start_time * sample_rate)
                    if chunk_count < id0132_n_chunks:
                        assert f == "dataset1/ID0132.wav"
                        assert t == 3
                        assert np.all(d == id0132_data[start_ind:start_ind +
                                                       seg_size])
                    elif chunk_count < id0132_n_chunks + id0133_n_chunks:
                        if is_dataset1:
                            is_dataset1 = False
                            start_time = 0.0
                            start_ind = 0

                        assert f == "dataset1/ID0133.wav"
                        assert t == 3
                        assert np.all(d == id0133_data[start_ind:start_ind +
                                                       seg_size])
                    elif chunk_count < id0132_n_chunks + id0133_n_chunks + id1238_n_chunks:
                        if is_dataset2:
                            is_dataset2 = False
                            start_time = 0.0
                            start_ind = 0

                        assert f == "dataset2/ID1238.wav"
                        assert t == -3
                        assert np.all(d == id1238_data[start_ind:start_ind +
                                                       seg_size])
                    else:
                        if is_dataset3:
                            is_dataset3 = False
                            start_time = 0.0
                            start_ind = 0

                        assert f == "dataset2/ID1322.wav"
                        assert t == -3
                        assert np.all(d == id1322_data[start_ind:start_ind +
                                                       seg_size])

                    start_time += (1 - seg_overlap) * seg_duration
                    chunk_count += 1
                count += 1

            assert count == n_minibatches
Beispiel #7
0
    def test_gen_minibatches_1d(self, ac_ext):
        sample_rate = 22050
        seg_duration = 0.1
        seg_overlap = 0.5
        seg_size = int(seg_duration * sample_rate)

        parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH,
                                    label_file=TEST_LABEL_PATH)
        classes = parser.get_labels()
        sf_pro = SegmentFeatureProcessor([ac_ext])
        sc_gen = SegmentContainerGenerator(DATA_PATH,
                                           sf_pro,
                                           label_parser=parser,
                                           seg_duration=seg_duration,
                                           seg_overlap=seg_overlap)

        id0132_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_1))
        id0133_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_2))
        id1238_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_3))
        id1322_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_4))

        n_epochs = 3
        batch_size = 10
        n_time_bins = int(seg_duration * sample_rate)

        chunk_size = sample_rate * seg_duration
        id0132_n_chunks = utils.get_n_overlapping_chunks(
            len(id0132_data), chunk_size, seg_overlap)
        id0133_n_chunks = utils.get_n_overlapping_chunks(
            len(id0133_data), chunk_size, seg_overlap)
        id1238_n_chunks = utils.get_n_overlapping_chunks(
            len(id1238_data), chunk_size, seg_overlap)
        id1322_n_chunks = utils.get_n_overlapping_chunks(
            len(id1322_data), chunk_size, seg_overlap)

        n_minibatches = (id0132_n_chunks + id0133_n_chunks + id1238_n_chunks +
                         id1322_n_chunks) // batch_size

        mb_gen = MiniBatchGen(
            sc_gen, batch_size,
            {"audio_chunk": {
                "feature_size": 1,
                "n_time_bins": n_time_bins
            }}, 0)

        for _ in range(n_epochs):
            mb_gen_e = mb_gen.execute(with_targets=True, with_filenames=True)
            count = 0
            start_time = 0.0
            chunk_count = 0
            is_dataset1, is_dataset2, is_dataset3 = [True, True, True]
            for data, targets, filenames in mb_gen_e:
                for d, t, f in zip(data["audio_chunk"], targets, filenames):
                    start_ind = int(start_time * sample_rate)
                    if chunk_count < id0132_n_chunks:
                        assert f == "dataset1/ID0132.wav"
                        assert t == 3
                        assert np.all(d == id0132_data[start_ind:start_ind +
                                                       seg_size])
                    elif chunk_count < id0132_n_chunks + id0133_n_chunks:
                        if is_dataset1:
                            is_dataset1 = False
                            start_time = 0.0
                            start_ind = 0

                        assert f == "dataset1/ID0133.wav"
                        assert t == 3
                        assert np.all(d == id0133_data[start_ind:start_ind +
                                                       seg_size])
                    elif chunk_count < id0132_n_chunks + id0133_n_chunks + id1238_n_chunks:
                        if is_dataset2:
                            is_dataset2 = False
                            start_time = 0.0
                            start_ind = 0

                        assert f == "dataset2/ID1238.wav"
                        assert t == -3
                        assert np.all(d == id1238_data[start_ind:start_ind +
                                                       seg_size])
                    else:
                        if is_dataset3:
                            is_dataset3 = False
                            start_time = 0.0
                            start_ind = 0

                        assert f == "dataset2/ID1322.wav"
                        assert t == -3
                        assert np.all(d == id1322_data[start_ind:start_ind +
                                                       seg_size])

                    start_time += (1 - seg_overlap) * seg_duration
                    chunk_count += 1
                count += 1

            assert count == n_minibatches
Beispiel #8
0
 def test_execute_typeerror(self):
     with pytest.raises(TypeError):
         en_ext = EnergyExtractor()
         SegmentFeatureProcessor([en_ext])
Beispiel #9
0
 def test_execute(self, ac_ext, segment_container):
     data, sample_rate = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE))
     sf_pro = SegmentFeatureProcessor([ac_ext])
     sf_pro.execute(segment_container)
     assert np.all(segment_container.segments[0].features["audio_chunk"] ==
                   data[int(0.5 * sample_rate):int(0.6 * sample_rate)])
Beispiel #10
0
 def test_init(self, ac_ext):
     try:
         SegmentFeatureProcessor([ac_ext])
     except Exception as e:
         pytest.fail("Unexpected Error: {}".format(e))
Beispiel #11
0
    def from_config(cls, config):
        """Creates a dict of minibatch generators from a config dict

        One minibatch generator is created for every set defined in the
        datasplit, if specified in the config file. Otherwise, only one is
        created.

        Args:
            config (dict): configuration object
        Returns:
            A dict with one item per set defined in the datasplit, such as
            out_dict["<set_name>"] = <minibatch generator for this set>. If no
            datasplit is defined, the set is named "default".
        """

        # data path config
        dp_config = config["data_path_config"]

        # minibatch config
        mb_config = config["minibatch_config"]

        # audio frame config
        af_config = config["audio_frame_config"]

        # segment config
        seg_config = config["segment_config"]

        # Create a label parser.
        # Because FileLabelParser is set with a file path and SegmentLabelParser
        # with a root path, two different keys are used
        if "file2label_filename" in dp_config:
            label_parser = label_parsers.CSVFileLabelParser(
                dp_config["file2label_filename"],
                label_file=dp_config["label_file"])
        elif "seg2label_root" in dp_config:
            label_parser = label_parsers.CSVSegmentLabelParser(
                dp_config["seg2label_root"], dp_config["label_file"])
        else:
            label_parser = None

        # get activity detection
        if "activity_detection" in config:
            act_det_config = config["activity_detection"]
            act_det = activity_detection.factory(
                act_det_config["name"],
                audio_frame_config=af_config,
                feature_config=act_det_config.get("config"))
        else:
            act_det = None

        # instanciate all frame feature extractors needed by the activity detection
        act_det_frame_feature_extractors = []
        if act_det:
            for ff_cfg in act_det.frame_feature_config:
                if extractors.is_feature_implemented(ff_cfg["name"]):
                    act_det_frame_feature_extractors.append(
                        extractors.factory(
                            ff_cfg["name"],
                            audio_frame_config=af_config,
                            feature_config=ff_cfg.get("config")))

        # instanciate all frame feature extractors that will feed the minibatch
        mb_frame_feature_extractors = []
        for ff_cfg in config["features"]:
            if (ff_cfg["name"] != "audio_chunk"
                    and extractors.is_feature_implemented(ff_cfg["name"])):
                mb_frame_feature_extractors.append(
                    extractors.factory(ff_cfg["name"],
                                       audio_frame_config=af_config,
                                       feature_config=ff_cfg.get("config")))

        # create a frame feature processor, in charge of computing all short-term features
        ff_pro = FrameFeatureProcessor(
            AudioFrameGen(sample_rate=af_config["sample_rate"],
                          win_size=af_config["win_size"],
                          hop_size=af_config["hop_size"]),
            act_det_frame_feature_extractors + mb_frame_feature_extractors,
            feature_container_root=dp_config.get('features_root'))

        # create needed segment-based feature extractors,
        # assuming that the extractor is a frame-based feature chunk extractor if it is
        # not implemented, and a generic chunk extractor otherwise
        sfe_list = []
        for feature_config in config['features']:
            if feature_config['name'] == "audio_chunk":
                sfe_list.append(
                    AudioChunkExtractor(dp_config['audio_root'],
                                        af_config['sample_rate']))
            elif extractors.is_feature_implemented(feature_config['name']):
                if "scaler" in feature_config:
                    scaler = joblib.load(feature_config['scaler'])
                else:
                    scaler = None
                sfe_list.append(
                    FrameFeatureChunkExtractor(feature_config['name'],
                                               scaler=scaler))
            else:
                sfe_list.append(
                    GenericChunkExtractor(feature_config['name'],
                                          feature_config['sample_rate'],
                                          feature_config['size']))

        # create a segment feature processor, in charge of computing all segment-based features
        sf_pro = SegmentFeatureProcessor(
            list(sfe_list + [act_det]) if act_det else sfe_list,
            ff_pro=ff_pro,
            audio_root=dp_config["audio_root"],
            feature_container_root=dp_config.get('features_root'))

        datasplit_path = dp_config.get("datasplit_path")
        sc_gen_dict = {}
        if not datasplit_path:
            # if no datasplit is present in the config file,
            # create one segment container generator
            sc_gen_dict["default"] = SegmentContainerGenerator(
                dp_config["audio_root"],
                sf_pro,
                label_parser=label_parser,
                seg_duration=seg_config["seg_duration"],
                seg_overlap=seg_config["seg_overlap"],
                shuffle_files=mb_config["shuffle_files"])
        else:
            # else create one per set in the datasplit
            datasplit = joblib.load(datasplit_path)
            for set_name, _ in datasplit["sets"].items():
                sc_gen_dict[set_name] = SegmentContainerGenerator(
                    dp_config["audio_root"],
                    sf_pro,
                    label_parser=label_parser,
                    dataset=datasplit["sets"][set_name],
                    seg_duration=seg_config["seg_duration"],
                    seg_overlap=seg_config["seg_overlap"],
                    shuffle_files=mb_config["shuffle_files"])

        # get all shape (feature_size, n_time_bins) for every feature
        # and build argument to be passed to the minibatch generators init
        # (i.e. a dict of dict {name: {feature size: value, num time bins: value}})
        feature_shape_dict = {}
        frame_feature_count = 0  # count frame features only, not audio_chunk
        for feature_config in config['features']:
            if feature_config['name'] == "audio_chunk":
                feature_size = 1
                n_time_bins = int(af_config["sample_rate"] *
                                  seg_config["seg_duration"])
            elif extractors.is_feature_implemented(feature_config['name']):
                feature_size = mb_frame_feature_extractors[
                    frame_feature_count].size
                n_time_bins = int(seg_config["seg_duration"] *
                                  af_config["sample_rate"] /
                                  af_config["hop_size"])
                frame_feature_count += 1
            else:
                feature_size = feature_config["size"]
                n_time_bins = int(seg_config["seg_duration"] *
                                  feature_config["sample_rate"])
            feature_shape_dict[feature_config['name']] = {
                "feature_size": feature_size,
                "n_time_bins": n_time_bins
            }

        mb_gen_dict = {}
        for set_name, sc_gen in sc_gen_dict.items():
            mb_gen_dict[set_name] = MiniBatchGen(
                sc_gen, mb_config["batch_size"], feature_shape_dict,
                mb_config["shuffle_mb_block_size"])

        return mb_gen_dict