Esempio n. 1
0
    def read_labels(path, corpus):

        for label_file in glob.glob(
                os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))):
            file_name = os.path.basename(label_file)
            key = file_name[len('{}_'.format(LABEL_FILE_PREFIX)
                                ):len(file_name) - len('.txt')]

            utterance_labels = collections.defaultdict(list)

            labels = textfile.read_separated_lines_generator(label_file,
                                                             separator=' ',
                                                             max_columns=4)

            for record in labels:
                label = record[3]
                start = float(record[1])
                end = float(record[2])
                meta = None
                meta_match = META_PATTERN.match(label)

                if meta_match is not None:
                    meta_json = meta_match.group(2)
                    meta = json.loads(meta_json)
                    label = meta_match.group(1)

                utterance_labels[record[0]].append(
                    assets.Label(label, start, end, meta=meta))

            for utterance_idx, labels in utterance_labels.items():
                ll = assets.LabelList(idx=key, labels=labels)
                corpus.utterances[utterance_idx].set_label_list(ll)
    def load_subset(corpus, path, subset_idx):
        """ Load subset into corpus. """
        csv_file = os.path.join(path, '{}.tsv'.format(subset_idx))
        subset_utt_ids = []

        entries = textfile.read_separated_lines_generator(
            csv_file,
            separator='\t',
            max_columns=8,
            ignore_lines_starting_with=['client_id'],
            keep_empty=True
        )

        for entry in entries:

            file_idx = CommonVoiceReader.create_assets_if_needed(
                corpus,
                path,
                entry
            )
            subset_utt_ids.append(file_idx)

        filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(subset_utt_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(subset_idx, subview)
Esempio n. 3
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        meta_file = os.path.join(path, META_FILENAME)
        records = textfile.read_separated_lines_generator(meta_file,
                                                          separator='\t',
                                                          max_columns=4)

        for record in records:
            idx = record[0]
            speaker_idx = record[1]
            language = record[2]
            transcript = record[3]

            file_path = os.path.join(path, 'audio', language,
                                     '{}.mp3'.format(idx))
            corpus.new_file(file_path, idx)

            if speaker_idx not in corpus.issuers.keys():
                issuer = issuers.Speaker(speaker_idx)
                corpus.import_issuers(issuer)

            utterance = corpus.new_utterance(idx, idx, speaker_idx)
            utterance.set_label_list(
                annotations.LabelList.create_single(
                    transcript, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

        return corpus
Esempio n. 4
0
    def load_speakers(path):
        entries = textfile.read_separated_lines_generator(
            path,
            separator='|',
            max_columns=5,
            ignore_lines_starting_with=[';'])

        speakers = {}

        for item in entries:
            idx = item[0].strip()
            gender_str = item[1].strip()

            if gender_str == 'M':
                gender = issuers.Gender.MALE
            elif gender_str == 'F':
                gender = issuers.Gender.FEMALE
            else:
                gender = issuers.Gender.UNKNOWN

            issuer = issuers.Speaker(idx, gender=gender)

            speakers[idx] = issuer

        return speakers
Esempio n. 5
0
    def _load_audio_list(self, path):
        """
        Load and filter the audio list.

        Args:
            path (str): Path to the audio list file.

        Returns:
            dict: Dictionary of filtered sentences (id : username, license, attribution-url)
        """

        result = {}

        for entry in textfile.read_separated_lines_generator(path,
                                                             separator='\t',
                                                             max_columns=4):
            for index, _ in enumerate(entry):
                if entry[index] == '\\N':
                    entry[index] = None

            if len(entry) < 4:
                entry.extend([None] * (4 - len(entry)))

            if not self.include_empty_licence and entry[2] is None:
                continue

            if self.include_licenses is not None and entry[
                    2] not in self.include_licenses:
                continue

            result[entry[0]] = entry[1:]

        return result
Esempio n. 6
0
def read_label_file(path):
    """
    Read the labels from an audacity label file.

    Args:
        path (str): Path to the label file.

    Returns:
        list: List of labels (start [sec], end [sec], label)

    Example::

        >>> read_label_file('/path/to/label/file.txt')
        [
            [0.0, 0.2, 'sie'],
            [0.2, 2.2, 'hallo']
        ]
    """
    labels = []

    for record in textfile.read_separated_lines_generator(path,
                                                          separator='\t',
                                                          max_columns=3):
        value = ''

        if len(record) > 2:
            value = str(record[2])

        labels.append([
            float(_clean_time(record[0])),
            float(_clean_time(record[1])), value
        ])

    return labels
Esempio n. 7
0
    def load_subset(corpus, path, subset_idx):
        """ Load subset into corpus. """
        csv_file = os.path.join(path, '{}.csv'.format(subset_idx))
        utt_ids = []

        for entry in textfile.read_separated_lines_generator(
                csv_file,
                separator=',',
                max_columns=8,
                ignore_lines_starting_with=['filename']):
            rel_file_path = entry[0]
            filename = os.path.split(rel_file_path)[1]
            basename = os.path.splitext(filename)[0]
            transcription = entry[1]
            age = CommonVoiceReader.map_age(entry[4])
            gender = CommonVoiceReader.map_gender(entry[5])

            idx = '{}-{}'.format(subset_idx, basename)
            file_path = os.path.join(path, rel_file_path)

            corpus.new_file(file_path, idx)
            issuer = assets.Speaker(idx, gender=gender, age_group=age)
            corpus.import_issuers(issuer)
            utterance = corpus.new_utterance(idx, idx, issuer.idx)
            utterance.set_label_list(
                assets.LabelList.create_single(
                    transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

            utt_ids.append(idx)

        filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(utt_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(subset_idx, subview)
Esempio n. 8
0
    def load_transcripts(path):
        entries = textfile.read_separated_lines_generator(
            path,
            separator=' ',
            max_columns=2
        )

        return {x[0].strip(): x[1].strip() for x in entries}
Esempio n. 9
0
def read_training_transcripts(path):
    transcripts = []

    for entry in textfile.read_separated_lines_generator(
            path,
            separator=',',
            max_columns=3,
            ignore_lines_starting_with=['wav_filename']):
        transcripts.append(entry[2])

    return transcripts
Esempio n. 10
0
def read_training_transcripts(path):
    transcripts = []
    lines = textfile.read_separated_lines_generator(
        path,
        separator=",",
        max_columns=3,
        ignore_lines_starting_with=["wav_filename"])

    for entry in tqdm.tqdm(list(lines)):
        transcripts.append(entry[2])

    return transcripts
Esempio n. 11
0
    def load_speakers(path):
        entries = textfile.read_separated_lines_generator(
            path,
            separator=',',
            max_columns=6,
            ignore_lines_starting_with=['speakerId'])

        idx_to_speaker = {}

        for entry in entries:
            spk = FluentSpeechReader.parse_speaker_record(entry)
            idx_to_speaker[spk.idx] = spk

        return idx_to_speaker
Esempio n. 12
0
    def load_part(base_path, part_name, corpus, speakers):
        part_file_path = os.path.join(base_path, 'data',
                                      '{}_data.csv'.format(part_name))
        entries = textfile.read_separated_lines_generator(
            part_file_path,
            separator=',',
            max_columns=7,
            ignore_lines_starting_with=[','])

        part_ids = []

        for entry in entries:
            file_path = entry[1]
            file_base = os.path.basename(file_path)
            idx = os.path.splitext(file_base)[0]
            speaker_idx = entry[2]
            part_ids.append(idx)

            if speaker_idx not in corpus.issuers.keys():
                corpus.import_issuers(speakers[speaker_idx])

            track = corpus.new_file(os.path.join(base_path, file_path), idx)

            utt = corpus.new_utterance(idx, track.idx, speaker_idx)

            transcription = annotations.LabelList.create_single(
                entry[3], idx=audiomate.corpus.LL_WORD_TRANSCRIPT)
            utt.set_label_list(transcription)

            if entry[4] != 'none':
                action = annotations.LabelList.create_single(entry[4],
                                                             idx='action')
                utt.set_label_list(action)

            if entry[5] != 'none':
                object_label = annotations.LabelList.create_single(
                    entry[5], idx='object')
                utt.set_label_list(object_label)

            if entry[6] != 'none':
                location = annotations.LabelList.create_single(entry[6],
                                                               idx='location')
                utt.set_label_list(location)

        filter = subset.MatchingUtteranceIdxFilter(
            utterance_idxs=set(part_ids))
        subview = subset.Subview(corpus, filter_criteria=[filter])
        corpus.import_subview(part_name, subview)
Esempio n. 13
0
    def _load_sentence_list(self, path):
        """
        Load and filter the sentence list.

        Args:
            path (str): Path to the sentence list.

        Returns:
            dict: Dictionary of sentences (id : language, transcription)
        """

        result = {}

        for entry in textfile.read_separated_lines_generator(path,
                                                             separator='\t',
                                                             max_columns=3):
            if self.include_languages is None or entry[
                    1] in self.include_languages:
                result[entry[0]] = entry[1:]

        return result
Esempio n. 14
0
def read_file(path):
    """
    Reads a ctm file.

    Args:
        path (str): Path to the file

    Returns:
        (dict): Dictionary with entries.

    Example::

        >>> read_file('/path/to/file.txt')
        {
            'wave-ab': [
                ['1', 0.00, 0.07, 'HI', 1],
                ['1', 0.09, 0.08, 'AH', 1]
            ],
            'wave-xy': [
                ['1', 0.00, 0.07, 'HI', 1],
                ['1', 0.09, 0.08, 'AH', 1]
            ]
        }
    """
    gen = textfile.read_separated_lines_generator(
        path, max_columns=6, ignore_lines_starting_with=[';;'])

    utterances = collections.defaultdict(list)

    for record in gen:
        values = record[1:len(record)]

        for i in range(len(values)):
            if i == 1 or i == 2 or i == 4:
                values[i] = float(values[i])

        utterances[record[0]].append(values)

    return utterances
Esempio n. 15
0
def load_projections(projections_file):
    """
    Loads projections defined in the given `projections_file`.

    The `projections_file` is expected to be in the following format::

        old_label_1 | new_label_1
        old_label_1 old_label_2 | new_label_2
        old_label_3 |

    You can define one projection per line. Each projection starts with a list of one or multiple
    old labels (separated by a single whitespace) that are separated from the new label by a pipe
    (`|`). In the code above, the segment labeled with `old_label_1` will be labeled with
    `new_label_1` after applying the projection. Segments that are labeled with `old_label_1`
    **and** `old_label_2` concurrently are relabeled to `new_label_2`. All segments labeled with
    `old_label_3` are dropped. Combinations of multiple labels are automatically sorted in natural
    order.

    Args:
        projections_file (str): Path to the file with projections

    Returns:
        dict: Dictionary where the keys are tuples of labels to project to the key's value

    Example:
        >>> load_projections('/path/to/projections.txt')
        {('b',): 'foo', ('a', 'b'): 'a_b', ('a',): 'bar'}
    """
    projections = {}
    for parts in textfile.read_separated_lines_generator(
            projections_file, '|'):
        combination = tuple(
            sorted([label.strip() for label in parts[0].split(' ')]))
        new_label = parts[1].strip()

        projections[combination] = new_label

    return projections