コード例 #1
0
    def __init__(self, name, vocab_file, ext='txt'):
        super(VocabSource, self).__init__(name, ext)
        self.vocab_file = vocab_file

        self.vocab = file_io.load_lines(self.vocab_file)
        self.vocab_map = {token: i for i, token in enumerate(self.vocab)}
        self.vocab_size = len(self.vocab)
コード例 #2
0
    def __init__(self, file_path):
        """Loads question set and prepares regexes for querying.

        Attributes:
            file_path (str): Question set to be loaded. Can be one of the four provided question sets;
                questions-unilex_dnn_600.hed
                questions-radio_dnn_416.hed
                questions-radio_phones_48.hed
                questions-mandarin.hed
                questions-japanese.hed
        """
        if file_path in pkg_resources.resource_listdir(
                'tts_data_tools', os.path.join('resources', 'question_sets')):
            print(
                f'Using tts_data_tools resource from resources/question_sets for {file_path}'
            )
            file_path = pkg_resources.resource_filename(
                'tts_data_tools',
                os.path.join('resources', 'question_sets', file_path))

        self.file_path = file_path
        self.lines = file_io.load_lines(self.file_path)
        # Ensure the only whitespaces are single space characters.
        self.lines = list(map(lambda l: re.sub('\s+', ' ', l), self.lines))

        self.binary_regexes, self.numerical_regexes = self.compile_questions(
            self.lines)
コード例 #3
0
def process_dir(festival_dir, txt_dir, id_list, out_dir, custom_voice=None):
    """Create Utterance structures for all sentences in `id_list` and save them to `out_dir`.

    Args:
        festival_dir (str): Directory containing festival installation.
        txt_dir (str): Directory containing text transcriptions.
        id_list (str): List of file basenames to process.
        out_dir (str): Directory to save the output to.
    """
    file_ids = utils.get_file_ids(id_list=id_list)

    sentences = []

    # For all file_ids load the sentence and add a command to create and save the Utterance structure.
    for file_id in sorted(file_ids):
        sentence = file_io.load_lines(os.path.join(txt_dir,
                                                   f'{file_id}.txt'))[0]
        sentence = sentence.replace('"', '\\"')
        sentences.append(sentence)

    # If the file_ids are paths (e.g. for multi-speaker data), make sure the directory structure is already in place.
    utils.make_dirs(os.path.join(out_dir, 'utts'), file_ids)

    # Create and save the Utterance structures.
    create_utterances(festival_dir,
                      file_ids,
                      sentences,
                      out_dir,
                      custom_voice=custom_voice)
コード例 #4
0
    def __init__(self,
                 file_path,
                 state_level=True,
                 states_per_phone=STATES_PER_PHONE):
        """Loads the label from `file_path` and processes basic information, preparing it for querying.

        Args:
            file_path (str): Label file to be loaded.
            state_level (bool): If True, the labels should be duplicated `self.states_per_phone` times per phone.
            states_per_phone (int): Number of states in a phone. If `self.state_level` is false, then this will be 1.
        """
        self.file_path = file_path
        self.base_name = os.path.splitext(os.path.basename(self.file_path))[0]
        self.state_level = state_level
        self.states_per_phone = states_per_phone if state_level else 1

        self.lines = file_io.load_lines(self.file_path)
        # Ensure the all whitespaces are single space characters.
        self.lines = list(map(lambda l: re.sub('\s+', ' ', l), self.lines))

        # Extracted labels will not be duplicated for each state in phone.
        self.labels = self.trim_labels(self.state_level)
        self.phones = self.extract_phone_identities()

        # If `self.state_level` is false, then each item in `self.state_in_phone_durations` will be a singleton list.
        self.state_in_phone_durations, self.phone_durations = self.extract_durations(
        )
コード例 #5
0
    def _full_to_mono(self,
                      full_file_name,
                      mono_file_name,
                      current_phone_regex=re.compile('-(.+?)\+')):
        phones = []

        label = file_io.load_lines(full_file_name)
        for line in label:
            phone = current_phone_regex.search(line).group(1)
            phones.append(phone)

        file_io.save_lines(phones, mono_file_name)

        return phones
コード例 #6
0
    def load_file(self, base_name, data_dir):
        r"""Loads lines of text.

        Parameters
        ----------
        base_name : str
            The name (without extensions) of the file to be loaded.
        data_dir : str
            The directory containing all feature types for this dataset.

        Returns
        -------
        list<str>
        """
        file_path = self.file_path(base_name, data_dir)
        return file_io.load_lines(file_path)
コード例 #7
0
    def _add_alignments_to_lab(self, mlf, lab_align_dir, lab_dir, file_ids):
        make_dirs(lab_align_dir, file_ids)

        with open(mlf, 'r') as f:
            # Consume the MLF #!header!# line.
            _ = f.readline()

            for file_id in file_ids:
                # Consume the file name line.
                line = f.readline()

                mlf_base_name = os.path.splitext(os.path.basename(line))[0]
                id_base_name = os.path.basename(file_id)

                if mlf_base_name != id_base_name:
                    raise ValueError(
                        f'The file order in the mlf ({mlf}) does not match file_ids)\n'
                        f'{mlf_base_name} {id_base_name}')

                label_no_align = file_io.load_lines(
                    os.path.join(lab_dir, f'{file_id}.lab'))

                label_state_align = []
                for label_tag in label_no_align:
                    label_tag = label_tag.strip()

                    for i in range(STATES_PER_PHONE):
                        # Consume a state alignment line.
                        line = f.readline().strip()

                        # Get the alignments for this state.
                        start_time, end_time, *_ = line.split()
                        label_state_align.append(
                            f'{start_time} {end_time} {label_tag}[{i + 2}]')

                # label_state_align
                file_io.save_lines(
                    label_state_align,
                    os.path.join(lab_align_dir, f'{file_id}.lab'))

                # Consume the end of file line marker ('.' character).
                line = f.readline().strip()

                if line != '.':
                    raise ValueError('The two files are not matched!')
コード例 #8
0
def sanitise_labs(lab_dir,
                  file_ids,
                  label_out_dir,
                  include_times=False,
                  state_level=False,
                  is_mono=False):

    utils.make_dirs(label_out_dir, file_ids)

    for file_id in file_ids:
        label = file_io.load_lines(os.path.join(lab_dir, f'{file_id}.lab'))
        n_phones = len(label)

        start_times, end_times, label = map(list, zip(*map(str.split, label)))
        start_times, end_times, label = sanitise_silences(start_times,
                                                          end_times,
                                                          label,
                                                          is_mono=is_mono)

        if state_level:
            if include_times:
                n_states = n_phones * STATES_PER_PHONE

                times = np.interp(range(0, n_states + 1, 1),
                                  range(0, n_states + 1, STATES_PER_PHONE),
                                  start_times + end_times[-1:])

                start_times = times[:-1]
                end_times = times[1:]

            label = np.repeat(label, STATES_PER_PHONE).tolist()
            for i in range(len(label)):
                state_idx = i % STATES_PER_PHONE
                label[i] += f'[{state_idx+2}]'

        if include_times:
            start_times = list(map(_round_dur, start_times))
            end_times = list(map(_round_dur, end_times))

            label = list(map(' '.join, zip(*[start_times, end_times, label])))

        file_io.save_lines(label, os.path.join(label_out_dir,
                                               f'{file_id}.lab'))
コード例 #9
0
def process_file(festival_dir, txt_file, out_dir, custom_voice=None):
    """Create Utterance structures for all sentences in `txt_file` and save them to `out_dir`.

    Args:
        festival_dir (str): Directory containing festival installation.
        txt_file (str): File containing all transcriptions, with the following schema,
            (file_id, "sentence transcription")*
        out_dir (str): Directory to save the output to.
    """
    line_regex = re.compile(r'\(\s*'
                            r'(?P<file_id>.+)'
                            r'\s+'
                            r'"(?P<sentence>.+)"'
                            r'\s*\)')

    file_ids = []
    sentences = []

    # For all lines in txt_file extract file_id + sentence and add a command to create and save the Utterance structure.
    for line in file_io.load_lines(txt_file):

        match = re.match(line_regex, line)
        if match is None:
            print(f'Match not found for the following line,\n{line}')
            continue

        file_id = match.group('file_id')
        file_ids.append(file_id)

        sentence = match.group('sentence')
        sentence = sentence.replace('"', '\\"')
        sentences.append(sentence)

    # Save the file_ids.
    file_io.save_lines(file_ids, os.path.join(out_dir, 'file_id_list.scp'))

    # Create and save the Utterance structures.
    create_utterances(festival_dir,
                      file_ids,
                      sentences,
                      out_dir,
                      custom_voice=custom_voice)
コード例 #10
0
def process(lab_dir, id_list, out_dir, state_level, lab_dir_with_pos, wav_dir):
    """Processes label files in id_list, saves the phone identities (as a string) to text files.

    Args:
        lab_dir (str): Directory containing the label files.
        id_list (str): List of file basenames to process.
        out_dir (str): Directory to save the output to.
        state_level (bool): Indicates that the label files are state level if True, otherwise they are frame level.
    """
    file_ids = utils.get_file_ids(id_list=id_list)

    utils.make_dirs(os.path.join(out_dir, 'segment_n_phones'), file_ids)
    utils.make_dirs(os.path.join(out_dir, 'segment_n_frames'), file_ids)
    utils.make_dirs(os.path.join(out_dir, 'n_segments'), file_ids)

    for file_id in file_ids:
        lab_path_with_pos = os.path.join(lab_dir_with_pos, f'{file_id}.lab')
        label_with_pos = file_io.load_lines(lab_path_with_pos)

        word_start_idxs, _ = get_word_idxs(
            label_with_pos, word_idx_sep=(r'@', r'\+'), phrase_idx_sep=(r'@', r'='))
        pos_tags = get_pos_tags(label_with_pos, word_start_idxs)

        lab_path = os.path.join(lab_dir, f'{file_id}.lab')
        label = lab_to_feat.Label(lab_path, state_level)

        durations = label.phone_durations
        n_frames = np.sum(durations).item()
        n_phones = len(label.phones)

        word_start_idxs, word_end_idxs = get_word_idxs(
            label.labels, word_idx_sep=(r':', r'\+'), phrase_idx_sep=(r':', r'='))
        try:
            segment_start_idxs, segment_end_idxs = segment_words(word_start_idxs, word_end_idxs, pos_tags)
        except (ValueError, IndexError) as e:
            print(f'{e}\n{file_id}')
        else:
            wav_path = os.path.join(wav_dir, f'{file_id}.wav')
            wav, sample_rate = file_io.load_wav(wav_path)
            f0, _, _, _ = world_with_reaper_f0.analysis(wav, sample_rate)

            # Match the number of frames between label forced-alignment and vocoder analysis.
            # Often the durations from forced alignment are a few frames longer than the vocoder features.
            diff = n_frames - f0.shape[0]
            if diff > n_phones:
                raise ValueError(f'Number of label frames and vocoder frames is too different for {file_id}\n'
                                 f'\tlabel frames {n_frames}\n'
                                 f'\tvocoder frames {f0.shape[0]}\n'
                                 f'\tnumber of phones {n_phones}')

            # Remove excess durations if there is a shape mismatch.
            if diff > 0:
                # Remove 1 frame from each phone's duration starting at the end of the sequence.
                durations[-diff:] -= 1
                n_frames = f0.shape[0]
                print(f'Cropped {diff} frames from durations for utterance {file_id}')

            assert n_frames == np.sum(durations).item()

            segment_phone_lens = []
            segment_frame_lens = []
            for segment_start_idx, segment_end_idx in zip(segment_start_idxs, segment_end_idxs):
                segment_phone_lens.append(segment_end_idx - segment_start_idx)
                segment_frame_lens.append(sum(durations[segment_start_idx:segment_end_idx]))

            file_io.save_txt(segment_phone_lens, os.path.join(out_dir, 'segment_n_phones', f'{file_id}.txt'))
            file_io.save_txt(segment_frame_lens, os.path.join(out_dir, 'segment_n_frames', f'{file_id}.txt'))
            file_io.save_txt(len(segment_phone_lens), os.path.join(out_dir, 'n_segments', f'{file_id}.txt'))
コード例 #11
0
    def train_hmm(self, niter, num_mix, num_splits=1):
        """
        Perform one or more rounds of estimation
        """
        print('---training HMM models')

        if num_splits != 1:
            # Call HERest in multiple chunks, split scp in num_splits chunks and save them.
            print(f'----num_splits set to {num_splits}')

            train_scp_chunks = []

            mfc_files = file_io.load_lines(self.train_scp)
            random.shuffle(mfc_files)

            n = (len(mfc_files) + 1) // num_splits
            mfc_chunks = [
                mfc_files[j:j + n] for j in range(0, len(mfc_files), n)
            ]

            for i, mfc_chunk in enumerate(mfc_chunks):
                train_scp_chunk = os.path.join(self.cfg_dir, f'train_{i}.scp')
                train_scp_chunks.append(train_scp_chunk)

                file_io.save_lines(mfc_chunk, train_scp_chunk)

        done = 0
        mix = 1
        while mix <= num_mix and done == 0:
            for i in range(niter):
                next_dir = os.path.join(self.model_dir,
                                        f'hmm_mix_{mix}_iter_{i+1}')
                if not os.path.exists(next_dir):
                    os.makedirs(next_dir)

                if num_splits == 1:
                    subprocess.run([
                        self.HERest, '-C', self.cfg, '-S', self.train_scp,
                        '-I', self.phoneme_mlf, '-M', next_dir, '-H',
                        os.path.join(self.cur_dir, MACROS), '-H',
                        os.path.join(self.cur_dir,
                                     HMMDEFS), '-t', *PRUNING, self.phonemes
                    ],
                                   stdout=subprocess.PIPE,
                                   check=True)
                else:
                    procs = []
                    # Estimate per chunk.
                    for chunk_num in range(len(train_scp_chunks)):
                        procs.append(
                            subprocess.Popen([
                                self.HERest, '-C', self.cfg, '-S',
                                train_scp_chunks[chunk_num], '-I',
                                self.phoneme_mlf, '-M', next_dir, '-H',
                                os.path.join(self.cur_dir, MACROS), '-H',
                                os.path.join(self.cur_dir,
                                             HMMDEFS), '-t', *PRUNING, '-p',
                                str(chunk_num + 1), self.phonemes
                            ],
                                             stdout=subprocess.PIPE))

                    # Wait until all HERest calls are finished.
                    for p in procs:
                        p.wait()

                    # Now accumulate.
                    subprocess.run([
                        self.HERest, '-C', self.cfg, '-M', next_dir, '-H',
                        os.path.join(self.cur_dir, MACROS), '-H',
                        os.path.join(self.cur_dir,
                                     HMMDEFS), '-t', *PRUNING, '-p', '0',
                        self.phonemes, *glob.glob(next_dir + os.sep + "*.acc")
                    ],
                                   stdout=subprocess.PIPE,
                                   check=True)

                self.cur_dir = next_dir

            if mix * 2 <= num_mix:
                # Increase mixture number.
                hed_file = os.path.join(self.cfg_dir, f'mix_{mix * 2}.hed')
                with open(hed_file, 'w') as f:
                    f.write(
                        f'MU {mix * 2} {{*.state[2-{STATES_PER_PHONE + 2}].mix}}\n'
                    )

                next_dir = os.path.join(self.model_dir,
                                        f'hmm_mix_{mix * 2}_iter_0')
                os.makedirs(next_dir, exist_ok=True)

                subprocess.run([
                    self.HHEd, '-A', '-H',
                    os.path.join(self.cur_dir, MACROS), '-H',
                    os.path.join(self.cur_dir, HMMDEFS), '-M', next_dir,
                    hed_file, self.phonemes
                ],
                               check=True)

                self.cur_dir = next_dir
                mix *= 2

            else:
                done = 1
コード例 #12
0
    def make_proto(self):
        # make proto
        means = ' '.join(['0.0' for _ in range(39)])
        variances = ' '.join(['1.0' for _ in range(39)])

        with open(self.proto, 'w') as f:
            f.write('~o <VECSIZE> 39 <USER>\n'
                    '~h "proto"\n'
                    '<BEGINHMM>\n'
                    '<NUMSTATES> 7\n')

            for i in range(2, STATES_PER_PHONE + 2):
                f.write(f'<STATE> {i}\n<MEAN> 39\n{means}\n')
                f.write(f'<VARIANCE> 39\n{variances}\n')

            f.write('<TRANSP> 7\n'
                    ' 0.0 1.0 0.0 0.0 0.0 0.0 0.0\n'
                    ' 0.0 0.6 0.4 0.0 0.0 0.0 0.0\n'
                    ' 0.0 0.0 0.6 0.4 0.0 0.0 0.0\n'
                    ' 0.0 0.0 0.0 0.6 0.4 0.0 0.0\n'
                    ' 0.0 0.0 0.0 0.0 0.6 0.4 0.0\n'
                    ' 0.0 0.0 0.0 0.0 0.0 0.7 0.3\n'
                    ' 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'
                    '<ENDHMM>\n')

        # Make vFloors
        subprocess.run([
            self.HCompV, '-f', F, '-C', self.cfg, '-S', self.train_scp, '-M',
            self.cur_dir, self.proto
        ],
                       check=True)

        # Make local macro.
        with open(os.path.join(self.cur_dir, MACROS), 'w') as f:

            # Get first three lines from local proto.
            with open(os.path.join(self.cur_dir, 'proto'), 'r') as source:
                for _ in range(3):
                    f.write(source.readline())

            # Get remaining lines from vFloors.
            with open(os.path.join(self.cur_dir, VFLOORS), 'r') as source:
                f.writelines(source.readlines())

        # Make hmmdefs.
        with open(os.path.join(self.cur_dir, HMMDEFS), 'w') as f:

            with open(self.proto, 'r') as source:
                # Ignore first two lines.
                source.readline()
                source.readline()

                source_lines = source.readlines()

            phone_set = file_io.load_lines(self.phonemes)
            for phone in phone_set:
                # The header.
                f.write(f'~h "{phone}"\n')

                # The rest.
                f.writelines(source_lines)