예제 #1
0
    def read_issuers(file_path, corpus):
        if not os.path.isfile(file_path):
            return

        data = jsonfile.read_json_file(file_path)

        for issuer_idx, issuer_data in data.items():
            issuer_type = issuer_data.get('type', None)
            issuer_info = issuer_data.get('info', {})

            if issuer_type == 'speaker':
                gender = assets.Gender(
                    issuer_data.get('gender', 'unknown').lower())
                age_group = assets.AgeGroup(
                    issuer_data.get('age_group', 'unknown').lower())
                native_language = issuer_data.get('native_language', None)

                issuer = assets.Speaker(issuer_idx,
                                        gender=gender,
                                        age_group=age_group,
                                        native_language=native_language,
                                        info=issuer_info)
            elif issuer_type == 'artist':
                name = issuer_data.get('name', None)

                issuer = assets.Artist(issuer_idx, name=name, info=issuer_info)
            else:
                issuer = assets.Issuer(issuer_idx, info=issuer_info)

            corpus.import_issuers(issuer)
예제 #2
0
    def test_read_samples(self):
        file = assets.File('wav', resources.sample_wav_file('wav_1.wav'))
        issuer = assets.Issuer('toni')
        utt = assets.Utterance('test',
                               file,
                               issuer=issuer,
                               start=1.0,
                               end=2.30)

        l1 = assets.Label('a', 0.15, 0.448)
        l2 = assets.Label('a', 0.5, 0.73)
        ll = assets.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(file.path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(file.path,
                                         sr=None,
                                         offset=1.5,
                                         duration=0.23)
        assert np.array_equal(l2.read_samples(), expected)
예제 #3
0
    def test_import_utterance_no_issuer(self):
        importing_utterances = [
            assets.Utterance('a', self.ex_file, assets.Issuer('notexist'), 0,
                             10)
        ]

        with pytest.raises(ValueError):
            self.corpus.import_utterances(importing_utterances)
예제 #4
0
    def test_import_issuers(self):
        importing_issuers = [
            assets.Issuer('a'),
            assets.Issuer('b'),
            assets.Issuer('existing_issuer')
        ]

        mapping = self.corpus.import_issuers(importing_issuers)

        assert self.corpus.num_issuers == 4
        assert 'a' in self.corpus.issuers.keys()
        assert 'b' in self.corpus.issuers.keys()
        assert 'existing_issuer_1' in self.corpus.issuers.keys()

        assert len(mapping) == 3
        assert mapping['a'].idx == 'a'
        assert mapping['b'].idx == 'b'
        assert mapping['existing_issuer'].idx == 'existing_issuer_1'
예제 #5
0
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        self.corpus = audiomate.Corpus(self.tempdir)

        self.ex_file = assets.File('existing_file', '../any/path.wav')
        self.ex_issuer = assets.Issuer('existing_issuer')
        self.ex_utterance = assets.Utterance('existing_utt',
                                             self.ex_file,
                                             issuer=self.ex_issuer)

        self.corpus.files['existing_file'] = self.ex_file
        self.corpus.issuers['existing_issuer'] = self.ex_issuer
        self.corpus.utterances['existing_utt'] = self.ex_utterance
예제 #6
0
    def setUp(self):
        self.ll_1 = assets.LabelList(idx='alpha',
                                     labels=[
                                         assets.Label('a', 3.2, 4.5),
                                         assets.Label('b', 5.1, 8.9),
                                         assets.Label('c', 7.2, 10.5),
                                         assets.Label('d', 10.5, 14),
                                         assets.Label('d', 15, 18)
                                     ])

        self.ll_2 = assets.LabelList(idx='bravo',
                                     labels=[
                                         assets.Label('a', 1.0, 4.2),
                                         assets.Label('e', 4.2, 7.9),
                                         assets.Label('c', 7.2, 10.5),
                                         assets.Label('f', 10.5, 14),
                                         assets.Label('d', 15, 17.3)
                                     ])

        self.ll_duplicate_idx = assets.LabelList(
            idx='charlie',
            labels=[assets.Label('t', 1.0, 4.2),
                    assets.Label('h', 4.2, 7.9)])

        self.ll_3 = assets.LabelList(
            idx='charlie',
            labels=[assets.Label('a', 1.0, 4.2),
                    assets.Label('g', 4.2, 7.9)])

        self.file = assets.File('wav', resources.sample_wav_file('wav_1.wav'))
        self.issuer = assets.Issuer('toni')
        self.utt = assets.Utterance('test',
                                    self.file,
                                    issuer=self.issuer,
                                    start=1.25,
                                    end=1.30,
                                    label_lists=[
                                        self.ll_1, self.ll_2,
                                        self.ll_duplicate_idx, self.ll_3
                                    ])
예제 #7
0
    def new_issuer(self, issuer_idx, info=None):
        """
        Add a new issuer to the dataset with the given data.

        Parameters:
            issuer_idx (str): The id to associate the issuer with. If None or already exists, one is
                              generated.
            info (dict, list): Additional info of the issuer.

        Returns:
            Issuer: The newly added issuer.
        """

        new_issuer_idx = issuer_idx

        # Add index to idx if already existing
        if new_issuer_idx in self._issuers.keys():
            new_issuer_idx = naming.index_name_if_in_list(
                new_issuer_idx, self._issuers.keys())

        new_issuer = assets.Issuer(new_issuer_idx, info=info)
        self._issuers[new_issuer_idx] = new_issuer

        return new_issuer
예제 #8
0
def create_dataset():
    temp_path = tempfile.mkdtemp()

    ds = audiomate.Corpus(temp_path)

    wav_1_path = sample_wav_file('wav_1.wav')
    wav_2_path = sample_wav_file('wav_2.wav')
    wav_3_path = sample_wav_file('wav_3.wav')
    wav_4_path = sample_wav_file('wav_4.wav')

    file_1 = ds.new_file(wav_1_path, file_idx='wav-1')
    file_2 = ds.new_file(wav_2_path, file_idx='wav_2')
    file_3 = ds.new_file(wav_3_path, file_idx='wav_3')
    file_4 = ds.new_file(wav_4_path, file_idx='wav_4')

    issuer_1 = assets.Speaker('spk-1', gender=assets.Gender.MALE)
    issuer_2 = assets.Speaker('spk-2', gender=assets.Gender.FEMALE)
    issuer_3 = assets.Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_1.idx)
    utt_3 = ds.new_utterance('utt-3',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_4 = ds.new_utterance('utt-4',
                             file_3.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    utt_5 = ds.new_utterance('utt-5', file_4.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        assets.LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                         labels=[assets.Label('who am i')]))
    utt_2.set_label_list(
        assets.LabelList(
            audiomate.corpus.LL_WORD_TRANSCRIPT,
            labels=[assets.Label('who are you', meta={
                'a': 'hey',
                'b': 2
            })]))
    utt_3.set_label_list(
        assets.LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                         labels=[assets.Label('who is he')]))
    utt_4.set_label_list(
        assets.LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                         labels=[assets.Label('who are they')]))
    utt_5.set_label_list(
        assets.LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                         labels=[assets.Label('who is she')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-4', 'utt-5'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    ds.new_feature_container('mfcc', '/some/dummy/path')
    ds.new_feature_container('mel', '/some/dummy/path_mel')

    return ds