Esempio n. 1
0
    def read_issuers(file_path, corpus):
        if not os.path.isfile(file_path):
            return

        data = jsonfile.read_json_file(file_path)

        for issuer_idx, issuer_data in data.items():
            issuer_type = issuer_data.get('type', None)
            issuer_info = issuer_data.get('info', {})

            if issuer_type == 'speaker':
                gender = assets.Gender(
                    issuer_data.get('gender', 'unknown').lower())
                age_group = assets.AgeGroup(
                    issuer_data.get('age_group', 'unknown').lower())
                native_language = issuer_data.get('native_language', None)

                issuer = assets.Speaker(issuer_idx,
                                        gender=gender,
                                        age_group=age_group,
                                        native_language=native_language,
                                        info=issuer_info)
            elif issuer_type == 'artist':
                name = issuer_data.get('name', None)

                issuer = assets.Artist(issuer_idx, name=name, info=issuer_info)
            else:
                issuer = assets.Issuer(issuer_idx, info=issuer_info)

            corpus.import_issuers(issuer)
Esempio n. 2
0
    def get_audio_file_info(self, article_path):
        """
        Return info about the audio files.
        List of tuples with (path, offset).
        """

        info_path = os.path.join(article_path, 'info.json')
        info = jsonfile.read_json_file(info_path)
        audio_files = {}

        if len(info['audio_files']) == 1:
            path = os.path.join(article_path, 'audio.ogg')

            if 'offset' not in info['audio_files'][0].keys():
                return {}

            offset = info['audio_files'][0]['offset']
            audio_files[path] = offset

        else:
            for i, af in enumerate(info['audio_files']):
                path = os.path.join(article_path, 'audio{}.ogg'.format(i + 1))
                offset = af['offset']
                audio_files[path] = offset

        return audio_files
Esempio n. 3
0
    def _load_list_of_invalid_utterances(self):
        io_folder = os.path.dirname(__file__)
        invalid_utt_path = os.path.join(io_folder, 'data', self.type(),
                                        'invalid_utterances.json')

        if os.path.isfile(invalid_utt_path):
            return jsonfile.read_json_file(invalid_utt_path)
        else:
            return []
Esempio n. 4
0
    def test_save_issuers(self, writer, sample_corpus, tmpdir):
        writer.save(sample_corpus, tmpdir.strpath)
        data = jsonfile.read_json_file(
            os.path.join(tmpdir.strpath, 'issuers.json'))

        expected = {
            'spk-1': {
                'type': 'speaker',
                'gender': 'male'
            },
            'spk-2': {
                'type': 'speaker',
                'gender': 'female'
            },
            'spk-3': {}
        }

        assert data == expected
Esempio n. 5
0
 def load_speaker_meta(corpus_path):
     meta_file = os.path.join(corpus_path, 'data', 'audioMNIST_meta.txt')
     return jsonfile.read_json_file(meta_file)