예제 #1
0
 def __init__(self,
              annotation_tiers,
              hierarchy,
              make_transcription=True,
              stop_check=None,
              call_back=None):
     super(AlignerParser,
           self).__init__(annotation_tiers, hierarchy, make_transcription,
                          False, stop_check, call_back)
     self.speaker_parser = DirectorySpeakerParser()
예제 #2
0
class AlignerParser(TextgridParser):
    word_label = 'word'
    phone_label = 'phone'
    name = 'aligner'
    speaker_first = True
    def __init__(self, annotation_types, hierarchy, make_transcription=True,
                 make_label=False,
                 stop_check=None, call_back=None):
        super(AlignerParser, self).__init__(annotation_types, hierarchy, make_transcription,
                                        make_label, stop_check, call_back)
        self.speaker_parser = DirectorySpeakerParser()

    def _is_valid(self, tg):
        found_word = False
        found_phone = False
        invalid = True
        multiple_speakers = False
        for ti in tg.tiers:
            if ' - ' in ti.name:
                multiple_speakers = True
                break
        if multiple_speakers:
            if self.speaker_first:
                speakers = {x.name.split(' - ')[0].strip() for x in tg.tiers if ' - ' in x.name}
            else:
                speakers = {x.name.split(' - ')[1].strip() for x in tg.tiers if ' - ' in x.name}
            found_words = {x: False for x in speakers}
            found_phones = {x: False for x in speakers}
            for ti in tg.tiers:
                if ' - ' not in ti.name:
                    continue
                if self.speaker_first:
                    speaker, name = ti.name.split(' - ')
                else:
                    name, speaker = ti.name.split(' - ')
                speaker = speaker.strip()
                name = name.strip()
                if name.lower().startswith(self.word_label):
                    found_words[speaker] = True
                elif name.lower().startswith(self.phone_label):
                    found_phones[speaker] = True
            found_word = all(found_words.values())
            found_phone = all(found_words.values())
        else:
            for ti in tg.tiers:
                if ti.name.lower().startswith(self.word_label):
                    found_word = True
                elif ti.name.lower().startswith(self.phone_label):
                    found_phone = True
        return multiple_speakers, found_word and found_phone

    def parse_discourse(self, path, types_only=False):
        '''
        Parse a TextGrid file for later importing.

        Parameters
        ----------
        path : str
            Path to TextGrid file

        Returns
        -------
        :class:`~polyglotdb.io.discoursedata.DiscourseData`
            Parsed data from the file
        '''
        tg = TextGrid()
        try:
            tg.read(path)
        except Exception as e:
            print('There was an issue parsing {}:'.format(path))
            raise

        multiple_speakers, is_valid = self._is_valid(tg)

        if not is_valid:
            raise (TextGridError('This file ({}) cannot be parsed by the {} parser.'.format(path, self.name)))
        name = os.path.splitext(os.path.split(path)[1])[0]

        # Format 1
        if not multiple_speakers:
            if self.speaker_parser is not None:
                speaker = self.speaker_parser.parse_path(path)
            else:
                speaker = None

            for a in self.annotation_types:
                a.reset()
                a.speaker = speaker

            # Parse the tiers
            for i, ti in enumerate(tg.tiers):
                if ti.name.lower().startswith(self.word_label):
                    self.annotation_types[0].add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti))
                elif ti.name.lower().startswith(self.phone_label):
                    self.annotation_types[1].add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti))
            pg_annotations = self._parse_annotations(types_only)

            data = DiscourseData(name, pg_annotations, self.hierarchy)
            for a in self.annotation_types:
                a.reset()

        # Format 2
        else:
            dummy = self.annotation_types
            self.annotation_types = []
            wav_path = find_wav_path(path)
            speaker_channel_mapping = {}
            if wav_path is not None:
                n_channels = get_n_channels(wav_path)
                if n_channels > 1:
                    # Figure speaker-channel mapping
                    n_tiers = 0
                    for ti in tg.tiers:
                        try:
                            speaker, type = ti.name.split(' - ')
                        except ValueError:
                            continue
                        n_tiers += 1
                    ind = 0
                    cutoffs = [x / n_channels for x in range(1, n_channels)]
                    for ti in tg.tiers:
                        try:
                            if self.speaker_first:
                                speaker, type = ti.name.split(' - ')
                            else:
                                type, speaker = ti.name.split(' - ')
                        except ValueError:
                            continue
                        if speaker in speaker_channel_mapping:
                            continue
                        for i, c in enumerate(cutoffs):
                            if ind / n_channels < c:
                                speaker_channel_mapping[speaker] = i
                                break
                        else:
                            speaker_channel_mapping[speaker] = i + 1
                        ind += 1

            # Parse the tiers
            for ti in tg.tiers:
                try:
                    if self.speaker_first:
                        speaker, type = ti.name.split(' - ')
                    else:
                        type, speaker = ti.name.split(' - ')
                except ValueError:
                    continue
                if type.lower().startswith(self.word_label):
                    type = 'word'
                elif type.lower().startswith(self.phone_label):
                    type = 'phone'
                if len(ti) == 1 and ti[0].mark.strip() == '':
                    continue
                at = OrthographyTier(type, type)
                at.speaker = speaker
                at.add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti))
                self.annotation_types.append(at)
            pg_annotations = self._parse_annotations(types_only)
            data = DiscourseData(name, pg_annotations, self.hierarchy)
            data.speaker_channel_mapping = speaker_channel_mapping

            self.annotation_types = dummy

        data.wav_path = find_wav_path(path)
        return data
예제 #3
0
def test_directory_parsing(buckeye_test_dir):
    path = os.path.join(buckeye_test_dir, 'test.words')
    parser = DirectorySpeakerParser()
    name = parser.parse_path(path)
    assert (name == 'buckeye')
예제 #4
0
class AlignerParser(TextgridParser):
    """
    Base class for parsing TextGrid output from forced aligners.

    Parameters
    ----------
    annotation_tiers : list
        List of the annotation tiers to store data from the TextGrid
    hierarchy : Hierarchy
        Basic hierarchy of the TextGrid
    make_transcription : bool
        Flag for whether to add a transcription property to words based on phones they contain
    stop_check : callable
        Function to check for whether parsing should stop
    call_back : callable
        Function to report progress in parsing

    Attributes
    ----------
    word_label : str
        Label identifying word tiers
    phone_label : str
        Label identifying phone tiers
    name : str
        Name of the aligner the TextGrids are from
    speaker_first : bool
        Whether speaker names precede tier types in the TextGrid when multiple speakers are present
    """
    word_label = 'word'
    phone_label = 'phone'
    name = 'aligner'
    speaker_first = True

    def __init__(self,
                 annotation_tiers,
                 hierarchy,
                 make_transcription=True,
                 stop_check=None,
                 call_back=None):
        super(AlignerParser,
              self).__init__(annotation_tiers, hierarchy, make_transcription,
                             False, stop_check, call_back)
        self.speaker_parser = DirectorySpeakerParser()

    def _is_valid(self, tg):
        found_word = False
        found_phone = False
        invalid = True
        multiple_speakers = False
        for ti in tg.tiers:
            if ' - ' in ti.name:
                multiple_speakers = True
                break
        if multiple_speakers:
            if self.speaker_first:
                speakers = {
                    x.name.split(' - ')[0].strip().replace('/', '_').replace(
                        '\\', '_')
                    for x in tg.tiers if ' - ' in x.name
                }
            else:
                speakers = {
                    x.name.split(' - ')[1].strip().replace('/', '_').replace(
                        '\\', '_')
                    for x in tg.tiers if ' - ' in x.name
                }
            found_words = {x: False for x in speakers}
            found_phones = {x: False for x in speakers}
            for ti in tg.tiers:
                if ' - ' not in ti.name:
                    continue
                if self.speaker_first:
                    speaker, name = ti.name.split(' - ')
                else:
                    name, speaker = ti.name.split(' - ')
                speaker = speaker.strip().replace('/', '_').replace('\\', '_')
                name = name.strip()
                if name.lower().startswith(self.word_label):
                    found_words[speaker] = True
                elif name.lower().startswith(self.phone_label):
                    found_phones[speaker] = True
            found_word = all(found_words.values())
            found_phone = all(found_words.values())
        else:
            for ti in tg.tiers:
                if ti.name.lower().startswith(self.word_label):
                    found_word = True
                elif ti.name.lower().startswith(self.phone_label):
                    found_phone = True
        return multiple_speakers, found_word and found_phone

    def parse_discourse(self, path, types_only=False):
        """
        Parse a forced aligned TextGrid file for later importing.

        Parameters
        ----------
        path : str
            Path to TextGrid file
        types_only : bool
            Flag for whether to only save type information, ignoring the token information

        Returns
        -------
        :class:`~polyglotdb.io.discoursedata.DiscourseData`
            Parsed data from the file
        """

        tg = self.load_textgrid(path)
        multiple_speakers, is_valid = self._is_valid(tg)

        if not is_valid:
            raise (TextGridError(
                'This file ({}) cannot be parsed by the {} parser.'.format(
                    path, self.name)))
        name = os.path.splitext(os.path.split(path)[1])[0]

        # Format 1
        if not multiple_speakers:
            if self.speaker_parser is not None:
                speaker = self.speaker_parser.parse_path(path)
            else:
                speaker = None

            for a in self.annotation_tiers:
                a.reset()
                a.speaker = speaker

            # Parse the tiers
            for i, ti in enumerate(tg.tiers):
                if ti.name.lower().startswith(self.word_label):
                    self.annotation_tiers[0].add(
                        ((x.mark.strip(), x.minTime, x.maxTime) for x in ti))
                elif ti.name.lower().startswith(self.phone_label):
                    self.annotation_tiers[1].add(
                        ((x.mark.strip(), x.minTime, x.maxTime) for x in ti))
            pg_annotations = self._parse_annotations(types_only)

            data = DiscourseData(name, pg_annotations, self.hierarchy)
            for a in self.annotation_tiers:
                a.reset()

        # Format 2
        else:
            dummy = self.annotation_tiers
            self.annotation_tiers = []
            wav_path = find_wav_path(path)
            speaker_channel_mapping = {}
            if wav_path is not None:
                n_channels = get_n_channels(wav_path)
                if n_channels > 1:
                    # Figure speaker-channel mapping
                    n_tiers = 0
                    for ti in tg.tiers:
                        try:
                            speaker, type = ti.name.split(' - ')
                        except ValueError:
                            continue
                        n_tiers += 1
                    ind = 0
                    cutoffs = [x / n_channels for x in range(1, n_channels)]
                    for ti in tg.tiers:
                        try:
                            if self.speaker_first:
                                speaker, type = ti.name.split(' - ')
                            else:
                                type, speaker = ti.name.split(' - ')
                            speaker = speaker.strip().replace('/',
                                                              '_').replace(
                                                                  '\\', '_')
                        except ValueError:
                            continue
                        if speaker in speaker_channel_mapping:
                            continue
                        for i, c in enumerate(cutoffs):
                            if ind / n_channels < c:
                                speaker_channel_mapping[speaker] = i
                                break
                        else:
                            speaker_channel_mapping[speaker] = i + 1
                        ind += 1

            # Parse the tiers
            for ti in tg.tiers:
                try:
                    if self.speaker_first:
                        speaker, type = ti.name.split(' - ')
                    else:
                        type, speaker = ti.name.split(' - ')
                    speaker = speaker.strip().replace('/',
                                                      '_').replace('\\', '_')
                except ValueError:
                    continue
                if type.lower().startswith(self.word_label):
                    type = 'word'
                elif type.lower().startswith(self.phone_label):
                    type = 'phone'
                if len(ti) == 1 and ti[0].mark.strip() == '':
                    continue
                at = OrthographyTier(type, type)
                at.speaker = speaker
                at.add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti))
                self.annotation_tiers.append(at)
            pg_annotations = self._parse_annotations(types_only)
            data = DiscourseData(name, pg_annotations, self.hierarchy)
            data.speaker_channel_mapping = speaker_channel_mapping

            self.annotation_tiers = dummy

        data.wav_path = find_wav_path(path)
        return data
예제 #5
0
def test_directory_parsing(buckeye_test_dir):
    path = os.path.join(buckeye_test_dir, 'test.words')
    parser = DirectorySpeakerParser()
    name = parser.parse_path(path)
    assert(name == 'buckeye')