Ejemplo n.º 1
0
    def read_issuers(file_path, corpus):
        if not os.path.isfile(file_path):
            return

        data = jsonfile.read_json_file(file_path)

        for issuer_idx, issuer_data in data.items():
            issuer_type = issuer_data.get('type', None)
            issuer_info = issuer_data.get('info', {})

            if issuer_type == 'speaker':
                gender = issuers.Gender(
                    issuer_data.get('gender', 'unknown').lower())
                age_group = issuers.AgeGroup(
                    issuer_data.get('age_group', 'unknown').lower())
                native_language = issuer_data.get('native_language', None)

                issuer = issuers.Speaker(issuer_idx,
                                         gender=gender,
                                         age_group=age_group,
                                         native_language=native_language,
                                         info=issuer_info)
            elif issuer_type == 'artist':
                name = issuer_data.get('name', None)

                issuer = issuers.Artist(issuer_idx,
                                        name=name,
                                        info=issuer_info)
            else:
                issuer = issuers.Issuer(issuer_idx, info=issuer_info)

            corpus.import_issuers(issuer)
Ejemplo n.º 2
0
def generate_issuers(n, rand=None):
    if rand is None:
        rand = random.Random()

    items = []

    for issuer_index in range(n):
        issuer_idx = 'issuer-{}'.format(issuer_index)

        issuer_type = rand.randint(1, 3)

        if issuer_type == 1:
            issuer = issuers.Speaker(
                issuer_idx,
                gender=issuers.Gender.UNKNOWN,
                age_group=issuers.AgeGroup.CHILD,
                native_language='de'
            )
        elif issuer_type == 2:
            issuer = issuers.Artist(issuer_idx, 'badam')
        else:
            issuer = issuers.Issuer(issuer_idx)

        items.append(issuer)

    return items
Ejemplo n.º 3
0
    def test_read_samples(self):
        path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('wav', path)
        issuer = issuers.Issuer('toni')
        utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30)

        l1 = annotations.Label('a', 0.15, 0.448)
        l2 = annotations.Label('a', 0.5, 0.73)
        ll = annotations.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.5,
                                         duration=1.73 - 1.5)

        print(expected.shape)
        print(l2.read_samples().shape)
        assert np.array_equal(l2.read_samples(), expected)
Ejemplo n.º 4
0
    def test_import_utterance_no_issuer(self, corpus):
        importing_utterances = [
            tracks.Utterance('a', corpus.tracks['existing_file'],
                             issuers.Issuer('notexist'), 0, 10)
        ]

        with pytest.raises(ValueError):
            corpus.import_utterances(importing_utterances)
Ejemplo n.º 5
0
    def test_import_issuers(self, corpus):
        importing_issuers = [
            issuers.Issuer('a'),
            issuers.Issuer('b'),
            issuers.Issuer('existing_issuer')
        ]

        mapping = corpus.import_issuers(importing_issuers)

        assert corpus.num_issuers == 4
        assert 'a' in corpus.issuers.keys()
        assert 'b' in corpus.issuers.keys()
        assert 'existing_issuer_1' in corpus.issuers.keys()

        assert len(mapping) == 3
        assert mapping['a'].idx == 'a'
        assert mapping['b'].idx == 'b'
        assert mapping['existing_issuer'].idx == 'existing_issuer_1'
Ejemplo n.º 6
0
def corpus():
    corpus = audiomate.Corpus()

    ex_file = tracks.FileTrack('existing_file', '../any/path.wav')
    ex_issuer = issuers.Issuer('existing_issuer')
    ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer)

    corpus.tracks['existing_file'] = ex_file
    corpus.issuers['existing_issuer'] = ex_issuer
    corpus.utterances['existing_utt'] = ex_utterance

    return corpus
Ejemplo n.º 7
0
    def setup_method(self):
        self.ll_1 = annotations.LabelList(idx='alpha',
                                          labels=[
                                              annotations.Label('a', 3.2, 4.5),
                                              annotations.Label('b', 5.1, 8.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('d', 10.5, 14),
                                              annotations.Label('d', 15, 18)
                                          ])

        self.ll_2 = annotations.LabelList(idx='bravo',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('e', 4.2, 7.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('f', 10.5, 14),
                                              annotations.Label('d', 15, 17.3)
                                          ])

        self.ll_duplicate_idx = annotations.LabelList(
            idx='charlie',
            labels=[
                annotations.Label('t', 1.0, 4.2),
                annotations.Label('h', 4.2, 7.9)
            ])

        self.ll_3 = annotations.LabelList(idx='charlie',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('g', 4.2, 7.9)
                                          ])

        self.track = tracks.FileTrack('wav',
                                      resources.sample_wav_file('wav_1.wav'))
        self.issuer = issuers.Issuer('toni')
        self.utt = tracks.Utterance('test',
                                    self.track,
                                    issuer=self.issuer,
                                    start=1.25,
                                    end=1.30,
                                    label_lists=[
                                        self.ll_1, self.ll_2,
                                        self.ll_duplicate_idx, self.ll_3
                                    ])
Ejemplo n.º 8
0
    def new_issuer(self, issuer_idx, info=None):
        """
        Add a new issuer to the dataset with the given data.

        Parameters:
            issuer_idx (str): The id to associate the issuer with. If None or already exists, one is
                              generated.
            info (dict, list): Additional info of the issuer.

        Returns:
            Issuer: The newly added issuer.
        """

        new_issuer_idx = issuer_idx

        # Add index to idx if already existing
        if new_issuer_idx in self._issuers.keys():
            new_issuer_idx = naming.index_name_if_in_list(new_issuer_idx, self._issuers.keys())

        new_issuer = issuers.Issuer(new_issuer_idx, info=info)
        self._issuers[new_issuer_idx] = new_issuer

        return new_issuer
Ejemplo n.º 9
0
 def test_contains_issuer_returns_false(self, ds):
     issuer = issuers.Issuer('spk-1')
     res = ds.contains_issuer(issuer)
     assert not res