Ejemplo n.º 1
0
    def _create_subviews(path, corpus):
        """ Load the subviews based on testing_list.txt and validation_list.txt """
        test_list_path = os.path.join(path, 'testing_list.txt')
        dev_list_path = os.path.join(path, 'validation_list.txt')

        test_list = textfile.read_separated_lines(test_list_path,
                                                  separator='/',
                                                  max_columns=2)
        dev_list = textfile.read_separated_lines(dev_list_path,
                                                 separator='/',
                                                 max_columns=2)

        test_set = set([
            '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in test_list
        ])
        dev_set = set([
            '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in dev_list
        ])
        inv_train_set = test_set.union(dev_set)

        train_filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs=inv_train_set, inverse=True)
        train_view = subview.Subview(corpus, filter_criteria=train_filter)
        corpus.import_subview('train', train_view)

        dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=dev_set,
                                                        inverse=False)
        dev_view = subview.Subview(corpus, filter_criteria=dev_filter)
        corpus.import_subview('dev', dev_view)

        test_filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs=test_set, inverse=False)
        test_view = subview.Subview(corpus, filter_criteria=test_filter)
        corpus.import_subview('test', test_view)
Ejemplo n.º 2
0
def create_sample_dataset(temp_dir):
    ds = audiomate.Corpus(str(temp_dir))

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.sample_wav_file('wav_2.wav')
    file_3_path = resources.get_resource_path(
        ['audio_formats', 'flac_1_16k_16b.flac'])

    file_1 = ds.new_file(file_1_path, track_idx='wav_1')
    file_2 = ds.new_file(file_2_path, track_idx='wav_2')
    file_3 = ds.new_file(file_3_path, track_idx='wav_3')

    issuer_1 = Speaker('spk-1', gender=Gender.MALE)
    issuer_2 = Speaker('spk-2', gender=Gender.FEMALE)
    issuer_3 = Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    # 2.5951875
    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_3 = ds.new_utterance('utt-3',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    # 5.0416875
    utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who am i')]))
    utt_2.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are you')]))
    utt_3.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is he')]))
    utt_4.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are they')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    return ds
Ejemplo n.º 3
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in SUBSETS:
            sub_path = os.path.join(path, part)
            ids = self.get_ids_from_folder(sub_path)
            utt_ids = []

            for idx in ids:
                add_ids = self.load_file(sub_path, idx, corpus)
                utt_ids.extend(add_ids)

            subview_filter = subview.MatchingUtteranceIdxFilter(
                utterance_idxs=utt_ids)
            subview_corpus = subview.Subview(corpus,
                                             filter_criteria=[subview_filter])
            corpus.import_subview(part, subview_corpus)

            TudaReader.create_wav_type_subviews(corpus,
                                                utt_ids,
                                                prefix='{}_'.format(part))

        TudaReader.create_wav_type_subviews(corpus, corpus.utterances.keys())

        return corpus
Ejemplo n.º 4
0
    def test_match(self):
        filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs={'a', 'b', 'd'})

        self.assertTrue(filter.match(assets.Utterance('a', 'x'), None))
        self.assertTrue(filter.match(assets.Utterance('b', 'x'), None))
        self.assertTrue(filter.match(assets.Utterance('d', 'x'), None))
        self.assertFalse(filter.match(assets.Utterance('c', 'x'), None))
        self.assertFalse(filter.match(assets.Utterance('e', 'x'), None))
Ejemplo n.º 5
0
    def test_match_inverse(self):
        filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs={'a', 'b', 'd'}, inverse=True)

        assert not filter.match(tracks.Utterance('a', 'x'), None)
        assert not filter.match(tracks.Utterance('b', 'x'), None)
        assert not filter.match(tracks.Utterance('d', 'x'), None)
        assert filter.match(tracks.Utterance('c', 'x'), None)
        assert filter.match(tracks.Utterance('e', 'x'), None)
Ejemplo n.º 6
0
    def test_match(self):
        utt_filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs={'a', 'b', 'd'})

        assert utt_filter.match(tracks.Utterance('a', 'x'), None)
        assert utt_filter.match(tracks.Utterance('b', 'x'), None)
        assert utt_filter.match(tracks.Utterance('d', 'x'), None)
        assert not utt_filter.match(tracks.Utterance('c', 'x'), None)
        assert not utt_filter.match(tracks.Utterance('e', 'x'), None)
Ejemplo n.º 7
0
    def create_wav_type_subviews(corpus, utt_ids, prefix=''):
        splits = collections.defaultdict(list)

        for utt_id in utt_ids:
            wavtype = utt_id.split('_')[-1]
            splits[wavtype].append(utt_id)

        for sub_name, sub_utts in splits.items():
            subview_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=sub_utts)
            subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter])
            corpus.import_subview('{}{}'.format(prefix, sub_name), subview_corpus)
Ejemplo n.º 8
0
    def test_import_subview(self):
        train_set = subview.Subview(None,
                                    filter_criteria=[
                                        subview.MatchingUtteranceIdxFilter(
                                            utterance_idxs={'existing_utt'})
                                    ])

        self.corpus.import_subview('train', train_set)

        assert self.corpus.num_subviews == 1
        assert self.corpus.subviews['train'] == train_set
        assert self.corpus.subviews['train'].corpus == self.corpus
Ejemplo n.º 9
0
    def _subviews_from_utterance_splits(self, splits):
        """
        Create subviews from a dict containing utterance-ids for each subview.

        e.g. {'train': ['utt-1', 'utt-2'], 'test': [...], ...}
        """
        subviews = {}

        for idx, subview_utterances in splits.items():
            filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=subview_utterances)
            split = subview.Subview(self.corpus, filter_criteria=filter)
            subviews[idx] = split

        return subviews
Ejemplo n.º 10
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in SUBSETS:
            sub_path = os.path.join(path, part)
            ids = TudaReader.get_ids_from_folder(sub_path, part)

            for idx in ids:
                TudaReader.load_file(sub_path, idx, corpus)

            subview_filter = subview.MatchingUtteranceIdxFilter(
                utterance_idxs=ids)
            subview_corpus = subview.Subview(corpus,
                                             filter_criteria=[subview_filter])
            corpus.import_subview(part, subview_corpus)

        return corpus
Ejemplo n.º 11
0
def create_multi_label_corpus():
    ds = audiomate.Corpus()

    wav_1_path = sample_wav_file('wav_1.wav')
    wav_2_path = sample_wav_file('wav_2.wav')
    wav_3_path = sample_wav_file('wav_3.wav')
    wav_4_path = sample_wav_file('wav_4.wav')

    file_1 = ds.new_file(wav_1_path, track_idx='wav-1')
    file_2 = ds.new_file(wav_2_path, track_idx='wav_2')
    file_3 = ds.new_file(wav_3_path, track_idx='wav_3')
    file_4 = ds.new_file(wav_4_path, track_idx='wav_4')

    issuer_1 = ds.new_issuer('spk-1')
    issuer_2 = ds.new_issuer('spk-2')
    issuer_3 = ds.new_issuer('spk-3')

    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_1.idx)
    utt_3 = ds.new_utterance('utt-3',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=15)
    utt_4 = ds.new_utterance('utt-4',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=15,
                             end=25)
    utt_5 = ds.new_utterance('utt-5',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=25,
                             end=40)
    utt_6 = ds.new_utterance('utt-6',
                             file_4.idx,
                             issuer_idx=issuer_3.idx,
                             start=0,
                             end=15)
    utt_7 = ds.new_utterance('utt-7',
                             file_4.idx,
                             issuer_idx=issuer_3.idx,
                             start=15,
                             end=25)
    utt_8 = ds.new_utterance('utt-8',
                             file_4.idx,
                             issuer_idx=issuer_3.idx,
                             start=25,
                             end=40)

    utt_1.set_label_list(
        LabelList(labels=[
            Label('music', 0, 5),
            Label('speech', 5, 12),
            Label('music', 13, 15)
        ]))

    utt_2.set_label_list(
        LabelList(labels=[
            Label('music', 0, 5),
            Label('speech', 5, 12),
            Label('music', 13, 15)
        ]))

    utt_3.set_label_list(
        LabelList(labels=[Label('music', 0, 1),
                          Label('speech', 2, 6)]))

    utt_4.set_label_list(
        LabelList(labels=[
            Label('music', 0, 5),
            Label('speech', 5, 12),
            Label('music', 13, 15)
        ]))

    utt_5.set_label_list(LabelList(labels=[Label('speech', 0, 7)]))

    utt_6.set_label_list(
        LabelList(labels=[
            Label('music', 0, 5),
            Label('speech', 5, 12),
            Label('music', 13, 15)
        ]))

    utt_7.set_label_list(
        LabelList(labels=[Label('music', 0, 5),
                          Label('speech', 5, 11)]))

    utt_8.set_label_list(LabelList(labels=[Label('music', 0, 10)]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-4', 'utt-5', 'utt-6'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-7', 'utt-8'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    ds.new_feature_container('mfcc', '/some/dummy/path/secondmfcc')
    ds.new_feature_container('energy', '/some/dummy/path/energy')

    return ds
Ejemplo n.º 12
0
def create_dataset():
    temp_path = tempfile.mkdtemp()

    ds = audiomate.Corpus(temp_path)

    wav_1_path = sample_wav_file('wav_1.wav')
    wav_2_path = sample_wav_file('wav_2.wav')
    wav_3_path = sample_wav_file('wav_3.wav')
    wav_4_path = sample_wav_file('wav_4.wav')

    file_1 = ds.new_file(wav_1_path, track_idx='wav-1')
    file_2 = ds.new_file(wav_2_path, track_idx='wav_2')
    file_3 = ds.new_file(wav_3_path, track_idx='wav_3')
    file_4 = ds.new_file(wav_4_path, track_idx='wav_4')

    issuer_1 = Speaker('spk-1', gender=Gender.MALE)
    issuer_2 = Speaker('spk-2', gender=Gender.FEMALE)
    issuer_3 = Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    # duration 2.5951875
    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_1.idx)
    utt_3 = ds.new_utterance('utt-3',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_4 = ds.new_utterance('utt-4',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    utt_5 = ds.new_utterance('utt-5', file_4.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who am i')]))
    utt_2.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are you', meta={
                      'a': 'hey',
                      'b': 2
                  })]))
    utt_3.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is he')]))
    utt_4.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are they')]))
    utt_5.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is she')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-4', 'utt-5'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    ds.new_feature_container('mfcc', '/some/dummy/path')
    ds.new_feature_container('mel', '/some/dummy/path_mel')

    return ds
Ejemplo n.º 13
0
 def test_serialize_inverse(self):
     f = subview.MatchingUtteranceIdxFilter(utterance_idxs={'a', 'b', 'd'},
                                            inverse=True)
     assert f.serialize() == 'exclude,a,b,d'
Ejemplo n.º 14
0
 def test_serialize(self):
     f = subview.MatchingUtteranceIdxFilter(utterance_idxs={'a', 'b', 'd'})
     assert f.serialize() == 'include,a,b,d'
Ejemplo n.º 15
0
 def setUp(self):
     filter = subview.MatchingUtteranceIdxFilter(
         utterance_idxs={'utt-1', 'utt-3'})
     self.corpus = resources.create_dataset()
     self.subview = subview.Subview(self.corpus, filter_criteria=[filter])
Ejemplo n.º 16
0
def sample_subview():
    filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-3'})
    corpus = resources.create_dataset()
    return subview.Subview(corpus, filter_criteria=[filter])