def read_issuers(file_path, corpus): if not os.path.isfile(file_path): return data = jsonfile.read_json_file(file_path) for issuer_idx, issuer_data in data.items(): issuer_type = issuer_data.get('type', None) issuer_info = issuer_data.get('info', {}) if issuer_type == 'speaker': gender = issuers.Gender( issuer_data.get('gender', 'unknown').lower()) age_group = issuers.AgeGroup( issuer_data.get('age_group', 'unknown').lower()) native_language = issuer_data.get('native_language', None) issuer = issuers.Speaker(issuer_idx, gender=gender, age_group=age_group, native_language=native_language, info=issuer_info) elif issuer_type == 'artist': name = issuer_data.get('name', None) issuer = issuers.Artist(issuer_idx, name=name, info=issuer_info) else: issuer = issuers.Issuer(issuer_idx, info=issuer_info) corpus.import_issuers(issuer)
def generate_issuers(n, rand=None): if rand is None: rand = random.Random() items = [] for issuer_index in range(n): issuer_idx = 'issuer-{}'.format(issuer_index) issuer_type = rand.randint(1, 3) if issuer_type == 1: issuer = issuers.Speaker( issuer_idx, gender=issuers.Gender.UNKNOWN, age_group=issuers.AgeGroup.CHILD, native_language='de' ) elif issuer_type == 2: issuer = issuers.Artist(issuer_idx, 'badam') else: issuer = issuers.Issuer(issuer_idx) items.append(issuer) return items
def test_read_samples(self): path = resources.sample_wav_file('wav_1.wav') track = tracks.FileTrack('wav', path) issuer = issuers.Issuer('toni') utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30) l1 = annotations.Label('a', 0.15, 0.448) l2 = annotations.Label('a', 0.5, 0.73) ll = annotations.LabelList(labels=[l1, l2]) utt.set_label_list(ll) expected, __ = librosa.core.load(path, sr=None, offset=1.15, duration=0.298) assert np.array_equal(l1.read_samples(), expected) expected, __ = librosa.core.load(path, sr=None, offset=1.5, duration=1.73 - 1.5) print(expected.shape) print(l2.read_samples().shape) assert np.array_equal(l2.read_samples(), expected)
def test_import_utterance_no_issuer(self, corpus): importing_utterances = [ tracks.Utterance('a', corpus.tracks['existing_file'], issuers.Issuer('notexist'), 0, 10) ] with pytest.raises(ValueError): corpus.import_utterances(importing_utterances)
def test_import_issuers(self, corpus): importing_issuers = [ issuers.Issuer('a'), issuers.Issuer('b'), issuers.Issuer('existing_issuer') ] mapping = corpus.import_issuers(importing_issuers) assert corpus.num_issuers == 4 assert 'a' in corpus.issuers.keys() assert 'b' in corpus.issuers.keys() assert 'existing_issuer_1' in corpus.issuers.keys() assert len(mapping) == 3 assert mapping['a'].idx == 'a' assert mapping['b'].idx == 'b' assert mapping['existing_issuer'].idx == 'existing_issuer_1'
def corpus(): corpus = audiomate.Corpus() ex_file = tracks.FileTrack('existing_file', '../any/path.wav') ex_issuer = issuers.Issuer('existing_issuer') ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer) corpus.tracks['existing_file'] = ex_file corpus.issuers['existing_issuer'] = ex_issuer corpus.utterances['existing_utt'] = ex_utterance return corpus
def setup_method(self): self.ll_1 = annotations.LabelList(idx='alpha', labels=[ annotations.Label('a', 3.2, 4.5), annotations.Label('b', 5.1, 8.9), annotations.Label( 'c', 7.2, 10.5), annotations.Label('d', 10.5, 14), annotations.Label('d', 15, 18) ]) self.ll_2 = annotations.LabelList(idx='bravo', labels=[ annotations.Label('a', 1.0, 4.2), annotations.Label('e', 4.2, 7.9), annotations.Label( 'c', 7.2, 10.5), annotations.Label('f', 10.5, 14), annotations.Label('d', 15, 17.3) ]) self.ll_duplicate_idx = annotations.LabelList( idx='charlie', labels=[ annotations.Label('t', 1.0, 4.2), annotations.Label('h', 4.2, 7.9) ]) self.ll_3 = annotations.LabelList(idx='charlie', labels=[ annotations.Label('a', 1.0, 4.2), annotations.Label('g', 4.2, 7.9) ]) self.track = tracks.FileTrack('wav', resources.sample_wav_file('wav_1.wav')) self.issuer = issuers.Issuer('toni') self.utt = tracks.Utterance('test', self.track, issuer=self.issuer, start=1.25, end=1.30, label_lists=[ self.ll_1, self.ll_2, self.ll_duplicate_idx, self.ll_3 ])
def new_issuer(self, issuer_idx, info=None): """ Add a new issuer to the dataset with the given data. Parameters: issuer_idx (str): The id to associate the issuer with. If None or already exists, one is generated. info (dict, list): Additional info of the issuer. Returns: Issuer: The newly added issuer. """ new_issuer_idx = issuer_idx # Add index to idx if already existing if new_issuer_idx in self._issuers.keys(): new_issuer_idx = naming.index_name_if_in_list(new_issuer_idx, self._issuers.keys()) new_issuer = issuers.Issuer(new_issuer_idx, info=info) self._issuers[new_issuer_idx] = new_issuer return new_issuer
def test_contains_issuer_returns_false(self, ds): issuer = issuers.Issuer('spk-1') res = ds.contains_issuer(issuer) assert not res