def test_execute(self, ac_ext): sample_rate = 22050 seg_duration = 0.1 seg_overlap = 0.5 parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) sf_pro = SegmentFeatureProcessor([ac_ext]) sc_gen = SegmentContainerGenerator(TEST_AUDIO_PATH_TUPLE_1[0], sf_pro, label_parser=parser, seg_duration=seg_duration, seg_overlap=seg_overlap) sc_list = [sc for sc in sc_gen.execute()] id0132_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_1)) id1238_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_4)) first_seg = sc_list[0].segments[0] first_seg_ref = id0132_data[:int(seg_duration * sample_rate)] last_seg = sc_list[-1].segments[-1] start_time = 0.0 while start_time + seg_duration < len(id1238_data) / sample_rate: start_time += seg_duration * seg_overlap start_time -= seg_duration * seg_overlap last_seg_ref = \ id1238_data[int(start_time*sample_rate):int((start_time+seg_duration)*sample_rate)] assert (len(sc_list) == 4 and np.all(first_seg_ref == first_seg.features["audio_chunk"]) and np.all(last_seg_ref == last_seg.features["audio_chunk"]))
def test_init(self): try: parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) sf_pro = SegmentFeatureProcessor([]) SegmentContainerGenerator("fake_audio_root", sf_pro, label_parser=parser) except Exception as e: pytest.fail("Unexpected Error: {}".format(e))
def test_data_multiple_features(self): """ Compare data from "manual" constructor to config file-based constructor. """ # construct minibatch generator "manually" sample_rate = 22050 win_size = 256 hop_size = 128 energy_threshold = 0.2 spectral_flatness_threshold = 0.3 seg_duration = 0.2 seg_overlap = 0.5 batch_size = 50 num_features = 64 n_time_bins = 34 af_gen = AudioFrameGen(sample_rate=sample_rate, win_size=win_size, hop_size=hop_size) en_ext = EnergyExtractor() sf_ext = SpectralFlatnessExtractor() mel_ext = MelSpectrumExtractor(sample_rate=sample_rate, fft_size=win_size, n_mels=64, min_freq=0, max_freq=sample_rate / 2) ff_pro = FrameFeatureProcessor(af_gen, [en_ext, sf_ext, mel_ext], feature_container_root=FEATURE_ROOT) ffc_ext = FrameFeatureChunkExtractor("mel_spectrum") act_det = Simple( energy_threshold=energy_threshold, spectral_flatness_threshold=spectral_flatness_threshold) sf_pro = SegmentFeatureProcessor([act_det, ffc_ext], ff_pro=ff_pro, audio_root=DATA_PATH) parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) sc_gen = SegmentContainerGenerator(DATA_PATH, sf_pro, label_parser=parser, seg_duration=seg_duration, seg_overlap=seg_overlap) mb_gen_1 = MiniBatchGen( sc_gen, batch_size, { "mel_spectrum": { "feature_size": num_features, "n_time_bins": n_time_bins } }, 0) # parse json file with open(CONFIG_PATH) as config_file: config = json.loads(config_file.read()) config["features"] = [{ 'name': 'audio_chunk', 'config': {} }, { 'name': 'mel_spectrum', 'config': { 'n_mels': 64, 'min_freq': 0, 'max_freq': 11025, 'log_amp': 1 } }] # construct minibatch generator from config mb_gen_dict = MiniBatchGen.from_config(config) mb_gen_2 = mb_gen_dict["default"] # execute and compare try: mb_gen_1_e = mb_gen_1.execute(active_segments_only=True, with_targets=True, with_filenames=False) mb_gen_2_e = mb_gen_2.execute(active_segments_only=True, with_targets=True, with_filenames=False) if not os.path.exists(FEATURE_ROOT): os.makedirs(FEATURE_ROOT) for mb1, mb2 in zip(mb_gen_1_e, mb_gen_2_e): assert np.all(mb1[0]["mel_spectrum"] == mb2[0]["mel_spectrum"]) assert np.all(mb1[1] == mb2[1]) except Exception as e: pytest.fail("Unexpected Error: {}".format(e)) finally: shutil.rmtree(FEATURE_ROOT)
def test_audio_data(self): """ Compare data from "manual" constructor to config file-based constructor, with raw audio feature. """ # construct minibatch generator "manually" sample_rate = 22050 seg_duration = 0.2 seg_overlap = 0.5 batch_size = 50 num_features = 1 n_time_bins = 4410 ac_ext = AudioChunkExtractor(DATA_PATH, sample_rate) sf_pro = SegmentFeatureProcessor([ac_ext], ff_pro=None, audio_root=DATA_PATH) parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) sc_gen = SegmentContainerGenerator(DATA_PATH, sf_pro, label_parser=parser, seg_duration=seg_duration, seg_overlap=seg_overlap) mb_gen_1 = MiniBatchGen( sc_gen, batch_size, { "audio_chunk": { "feature_size": num_features, "n_time_bins": n_time_bins } }, 0) # parse json file with open(CONFIG_PATH) as config_file: config = json.loads(config_file.read()) # replace feature by audio_chunk and remove activity detection config["features"] = [{"name": "audio_chunk", "config": {}}] config.pop("activity_detection") # construct minibatch generator from config mb_gen_dict = MiniBatchGen.from_config(config) mb_gen_2 = mb_gen_dict["default"] # execute and compare mb_gen_1_e = mb_gen_1.execute(active_segments_only=False, with_targets=True, with_filenames=True) mb_gen_2_e = mb_gen_2.execute(active_segments_only=False, with_targets=True, with_filenames=True) for mb1, mb2 in zip(mb_gen_1_e, mb_gen_2_e): assert np.all(mb1[0]["audio_chunk"] == mb2[0]["audio_chunk"]) assert np.all(mb1[1] == mb2[1]) assert np.all(mb1[2] == mb2[2])
def test_gen_minibatches_2d_w_pca_scaler(self): sample_rate = 22050 win_size = 256 hop_size = 128 energy_threshold = 0.2 spectral_flatness_threshold = 0.3 seg_duration = 0.1 seg_overlap = 0.5 batch_size = 10 num_features = 16 n_time_bins = 17 af_gen = AudioFrameGen(sample_rate=sample_rate, win_size=win_size, hop_size=hop_size) en_ext = EnergyExtractor() sf_ext = SpectralFlatnessExtractor() mel_ext = MelSpectrumExtractor(sample_rate=sample_rate, fft_size=win_size, n_mels=64, min_freq=0, max_freq=sample_rate / 2) ff_pro = FrameFeatureProcessor(af_gen, [en_ext, sf_ext, mel_ext], feature_container_root=FEATURE_ROOT) pca = joblib.load( os.path.join(DATA_PATH, "transform/mel64_pca16_norm/pca.jl")) scaler = joblib.load( os.path.join(DATA_PATH, "transform/mel64_pca16_norm/scaler.jl")) ffc_ext = FrameFeatureChunkExtractor("mel_spectrum", pca, scaler) act_det = Simple( energy_threshold=energy_threshold, spectral_flatness_threshold=spectral_flatness_threshold) sf_pro = SegmentFeatureProcessor([act_det, ffc_ext], ff_pro=ff_pro, audio_root=DATA_PATH) parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) sc_gen = SegmentContainerGenerator(DATA_PATH, sf_pro, label_parser=parser, seg_duration=seg_duration, seg_overlap=seg_overlap) sc_gen_e = sc_gen.execute() active_segments = [] # compare data in segment and corresponding data in feature container for sc in sc_gen_e: fc_path = os.path.join(FEATURE_ROOT, sc.audio_path.replace(".wav", ".fc.jl")) fc = feature_container.FeatureContainer.load(fc_path) for s in sc.segments: if hasattr(s, 'activity') and s.activity: start_ind = fc.time_to_frame_ind(s.start_time) end_ind = start_ind + n_time_bins data = scaler.transform( pca.transform(fc.features["mel_spectrum"]["data"] [start_ind:end_ind])) assert np.all(data == s.features["mel_spectrum"]) active_segments.append(s) # compare data in segment and corresponding data in minibatches mb_gen = MiniBatchGen( sc_gen, batch_size, { "mel_spectrum": { "feature_size": num_features, "n_time_bins": n_time_bins } }, 0) mb_gen_e = mb_gen.execute(active_segments_only=True, with_targets=False, with_filenames=False) count = 0 for mb, in mb_gen_e: for data in mb["mel_spectrum"]: assert np.all(data[0].T == active_segments[count].features["mel_spectrum"]) count += 1
def test_gen_minibatches_multiple_features_1(self, ac_ext): sample_rate = 22050 win_size = 256 hop_size = 128 seg_duration = 0.1 seg_overlap = 0.5 seg_size = int(seg_duration * sample_rate) parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) classes = parser.get_labels() n_epochs = 1 batch_size = 10 num_features_mel = 64 n_time_bins_mel = 17 num_features_audio = 1 n_time_bins_audio = 2205 af_gen = AudioFrameGen(sample_rate=sample_rate, win_size=win_size, hop_size=hop_size) mel_ext = MelSpectrumExtractor(sample_rate=sample_rate, fft_size=win_size, n_mels=num_features_mel, min_freq=0, max_freq=sample_rate / 2) ff_pro = FrameFeatureProcessor(af_gen, [mel_ext]) pca = None scaler = None ffc_ext = FrameFeatureChunkExtractor("mel_spectrum", pca, scaler) sf_pro = SegmentFeatureProcessor([ac_ext, ffc_ext], ff_pro=ff_pro, audio_root=DATA_PATH) sc_gen = SegmentContainerGenerator(DATA_PATH, sf_pro, label_parser=parser, seg_duration=seg_duration, seg_overlap=seg_overlap) id0132_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_1)) id0133_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_2)) id1238_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_3)) id1322_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_4)) chunk_size = sample_rate * seg_duration # -1 because the number of audio chunks and frame feature chunks differ by 1 id0132_n_chunks = utils.get_n_overlapping_chunks( len(id0132_data), chunk_size, seg_overlap) - 1 # -1 because the number of audio chunks and frame feature chunks differ by 1 id0133_n_chunks = utils.get_n_overlapping_chunks( len(id0133_data), chunk_size, seg_overlap) - 1 id1238_n_chunks = utils.get_n_overlapping_chunks( len(id1238_data), chunk_size, seg_overlap) id1322_n_chunks = utils.get_n_overlapping_chunks( len(id1322_data), chunk_size, seg_overlap) n_minibatches = (id0132_n_chunks + id0133_n_chunks + id1238_n_chunks + id1322_n_chunks) // batch_size mb_gen = MiniBatchGen( sc_gen, batch_size, { "mel_spectrum": { "feature_size": num_features_mel, "n_time_bins": n_time_bins_mel }, "audio_chunk": { "feature_size": num_features_audio, "n_time_bins": n_time_bins_audio } }, 0) for _ in range(n_epochs): mb_gen_e = mb_gen.execute(with_targets=True, with_filenames=True) count = 0 start_time = 0.0 chunk_count = 0 is_dataset1, is_dataset2, is_dataset3 = [True, True, True] for data, targets, filenames in mb_gen_e: for d, t, f in zip(data["audio_chunk"], targets, filenames): start_ind = int(start_time * sample_rate) if chunk_count < id0132_n_chunks: assert f == "dataset1/ID0132.wav" assert t == 3 assert np.all(d == id0132_data[start_ind:start_ind + seg_size]) elif chunk_count < id0132_n_chunks + id0133_n_chunks: if is_dataset1: is_dataset1 = False start_time = 0.0 start_ind = 0 assert f == "dataset1/ID0133.wav" assert t == 3 assert np.all(d == id0133_data[start_ind:start_ind + seg_size]) elif chunk_count < id0132_n_chunks + id0133_n_chunks + id1238_n_chunks: if is_dataset2: is_dataset2 = False start_time = 0.0 start_ind = 0 assert f == "dataset2/ID1238.wav" assert t == -3 assert np.all(d == id1238_data[start_ind:start_ind + seg_size]) else: if is_dataset3: is_dataset3 = False start_time = 0.0 start_ind = 0 assert f == "dataset2/ID1322.wav" assert t == -3 assert np.all(d == id1322_data[start_ind:start_ind + seg_size]) start_time += (1 - seg_overlap) * seg_duration chunk_count += 1 count += 1 assert count == n_minibatches
def test_gen_minibatches_1d(self, ac_ext): sample_rate = 22050 seg_duration = 0.1 seg_overlap = 0.5 seg_size = int(seg_duration * sample_rate) parser = CSVFileLabelParser(TEST_FILE2LABEL_PATH, label_file=TEST_LABEL_PATH) classes = parser.get_labels() sf_pro = SegmentFeatureProcessor([ac_ext]) sc_gen = SegmentContainerGenerator(DATA_PATH, sf_pro, label_parser=parser, seg_duration=seg_duration, seg_overlap=seg_overlap) id0132_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_1)) id0133_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_2)) id1238_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_3)) id1322_data, _ = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE_4)) n_epochs = 3 batch_size = 10 n_time_bins = int(seg_duration * sample_rate) chunk_size = sample_rate * seg_duration id0132_n_chunks = utils.get_n_overlapping_chunks( len(id0132_data), chunk_size, seg_overlap) id0133_n_chunks = utils.get_n_overlapping_chunks( len(id0133_data), chunk_size, seg_overlap) id1238_n_chunks = utils.get_n_overlapping_chunks( len(id1238_data), chunk_size, seg_overlap) id1322_n_chunks = utils.get_n_overlapping_chunks( len(id1322_data), chunk_size, seg_overlap) n_minibatches = (id0132_n_chunks + id0133_n_chunks + id1238_n_chunks + id1322_n_chunks) // batch_size mb_gen = MiniBatchGen( sc_gen, batch_size, {"audio_chunk": { "feature_size": 1, "n_time_bins": n_time_bins }}, 0) for _ in range(n_epochs): mb_gen_e = mb_gen.execute(with_targets=True, with_filenames=True) count = 0 start_time = 0.0 chunk_count = 0 is_dataset1, is_dataset2, is_dataset3 = [True, True, True] for data, targets, filenames in mb_gen_e: for d, t, f in zip(data["audio_chunk"], targets, filenames): start_ind = int(start_time * sample_rate) if chunk_count < id0132_n_chunks: assert f == "dataset1/ID0132.wav" assert t == 3 assert np.all(d == id0132_data[start_ind:start_ind + seg_size]) elif chunk_count < id0132_n_chunks + id0133_n_chunks: if is_dataset1: is_dataset1 = False start_time = 0.0 start_ind = 0 assert f == "dataset1/ID0133.wav" assert t == 3 assert np.all(d == id0133_data[start_ind:start_ind + seg_size]) elif chunk_count < id0132_n_chunks + id0133_n_chunks + id1238_n_chunks: if is_dataset2: is_dataset2 = False start_time = 0.0 start_ind = 0 assert f == "dataset2/ID1238.wav" assert t == -3 assert np.all(d == id1238_data[start_ind:start_ind + seg_size]) else: if is_dataset3: is_dataset3 = False start_time = 0.0 start_ind = 0 assert f == "dataset2/ID1322.wav" assert t == -3 assert np.all(d == id1322_data[start_ind:start_ind + seg_size]) start_time += (1 - seg_overlap) * seg_duration chunk_count += 1 count += 1 assert count == n_minibatches
def test_execute_typeerror(self): with pytest.raises(TypeError): en_ext = EnergyExtractor() SegmentFeatureProcessor([en_ext])
def test_execute(self, ac_ext, segment_container): data, sample_rate = sf.read(os.path.join(*TEST_AUDIO_PATH_TUPLE)) sf_pro = SegmentFeatureProcessor([ac_ext]) sf_pro.execute(segment_container) assert np.all(segment_container.segments[0].features["audio_chunk"] == data[int(0.5 * sample_rate):int(0.6 * sample_rate)])
def test_init(self, ac_ext): try: SegmentFeatureProcessor([ac_ext]) except Exception as e: pytest.fail("Unexpected Error: {}".format(e))
def from_config(cls, config): """Creates a dict of minibatch generators from a config dict One minibatch generator is created for every set defined in the datasplit, if specified in the config file. Otherwise, only one is created. Args: config (dict): configuration object Returns: A dict with one item per set defined in the datasplit, such as out_dict["<set_name>"] = <minibatch generator for this set>. If no datasplit is defined, the set is named "default". """ # data path config dp_config = config["data_path_config"] # minibatch config mb_config = config["minibatch_config"] # audio frame config af_config = config["audio_frame_config"] # segment config seg_config = config["segment_config"] # Create a label parser. # Because FileLabelParser is set with a file path and SegmentLabelParser # with a root path, two different keys are used if "file2label_filename" in dp_config: label_parser = label_parsers.CSVFileLabelParser( dp_config["file2label_filename"], label_file=dp_config["label_file"]) elif "seg2label_root" in dp_config: label_parser = label_parsers.CSVSegmentLabelParser( dp_config["seg2label_root"], dp_config["label_file"]) else: label_parser = None # get activity detection if "activity_detection" in config: act_det_config = config["activity_detection"] act_det = activity_detection.factory( act_det_config["name"], audio_frame_config=af_config, feature_config=act_det_config.get("config")) else: act_det = None # instanciate all frame feature extractors needed by the activity detection act_det_frame_feature_extractors = [] if act_det: for ff_cfg in act_det.frame_feature_config: if extractors.is_feature_implemented(ff_cfg["name"]): act_det_frame_feature_extractors.append( extractors.factory( ff_cfg["name"], audio_frame_config=af_config, feature_config=ff_cfg.get("config"))) # instanciate all frame feature extractors that will feed the minibatch mb_frame_feature_extractors = [] for ff_cfg in config["features"]: if (ff_cfg["name"] != "audio_chunk" and extractors.is_feature_implemented(ff_cfg["name"])): mb_frame_feature_extractors.append( extractors.factory(ff_cfg["name"], audio_frame_config=af_config, feature_config=ff_cfg.get("config"))) # create a frame feature processor, in charge of computing all short-term features ff_pro = FrameFeatureProcessor( AudioFrameGen(sample_rate=af_config["sample_rate"], win_size=af_config["win_size"], hop_size=af_config["hop_size"]), act_det_frame_feature_extractors + mb_frame_feature_extractors, feature_container_root=dp_config.get('features_root')) # create needed segment-based feature extractors, # assuming that the extractor is a frame-based feature chunk extractor if it is # not implemented, and a generic chunk extractor otherwise sfe_list = [] for feature_config in config['features']: if feature_config['name'] == "audio_chunk": sfe_list.append( AudioChunkExtractor(dp_config['audio_root'], af_config['sample_rate'])) elif extractors.is_feature_implemented(feature_config['name']): if "scaler" in feature_config: scaler = joblib.load(feature_config['scaler']) else: scaler = None sfe_list.append( FrameFeatureChunkExtractor(feature_config['name'], scaler=scaler)) else: sfe_list.append( GenericChunkExtractor(feature_config['name'], feature_config['sample_rate'], feature_config['size'])) # create a segment feature processor, in charge of computing all segment-based features sf_pro = SegmentFeatureProcessor( list(sfe_list + [act_det]) if act_det else sfe_list, ff_pro=ff_pro, audio_root=dp_config["audio_root"], feature_container_root=dp_config.get('features_root')) datasplit_path = dp_config.get("datasplit_path") sc_gen_dict = {} if not datasplit_path: # if no datasplit is present in the config file, # create one segment container generator sc_gen_dict["default"] = SegmentContainerGenerator( dp_config["audio_root"], sf_pro, label_parser=label_parser, seg_duration=seg_config["seg_duration"], seg_overlap=seg_config["seg_overlap"], shuffle_files=mb_config["shuffle_files"]) else: # else create one per set in the datasplit datasplit = joblib.load(datasplit_path) for set_name, _ in datasplit["sets"].items(): sc_gen_dict[set_name] = SegmentContainerGenerator( dp_config["audio_root"], sf_pro, label_parser=label_parser, dataset=datasplit["sets"][set_name], seg_duration=seg_config["seg_duration"], seg_overlap=seg_config["seg_overlap"], shuffle_files=mb_config["shuffle_files"]) # get all shape (feature_size, n_time_bins) for every feature # and build argument to be passed to the minibatch generators init # (i.e. a dict of dict {name: {feature size: value, num time bins: value}}) feature_shape_dict = {} frame_feature_count = 0 # count frame features only, not audio_chunk for feature_config in config['features']: if feature_config['name'] == "audio_chunk": feature_size = 1 n_time_bins = int(af_config["sample_rate"] * seg_config["seg_duration"]) elif extractors.is_feature_implemented(feature_config['name']): feature_size = mb_frame_feature_extractors[ frame_feature_count].size n_time_bins = int(seg_config["seg_duration"] * af_config["sample_rate"] / af_config["hop_size"]) frame_feature_count += 1 else: feature_size = feature_config["size"] n_time_bins = int(seg_config["seg_duration"] * feature_config["sample_rate"]) feature_shape_dict[feature_config['name']] = { "feature_size": feature_size, "n_time_bins": n_time_bins } mb_gen_dict = {} for set_name, sc_gen in sc_gen_dict.items(): mb_gen_dict[set_name] = MiniBatchGen( sc_gen, mb_config["batch_size"], feature_shape_dict, mb_config["shuffle_mb_block_size"]) return mb_gen_dict