Example #1
0
    def to_eaf(self, skipempty=True, pointlength=0.1):
        """Convert the object to an pympi.Elan.Eaf object

        :param int pointlength: Length of respective interval from points in
                                seconds
        :param bool skipempty: Skip the empty annotations
        :returns: :class:`pympi.Elan.Eaf` object
        :raises ImportError: If the Eaf module can't be loaded.
        :raises ValueError: If the pointlength is not strictly positive.
        """
        from pympi.Elan import Eaf
        eaf_out = Eaf()
        if pointlength <= 0:
            raise ValueError('Pointlength should be strictly positive')
        for tier in self.get_tiers():
            eaf_out.add_tier(tier.name)
            for ann in tier.get_intervals(True):
                if tier.tier_type == 'TextTier':
                    ann = (ann[0], ann[0]+pointlength, ann[1])
                if ann[2].strip() or not skipempty:
                    eaf_out.add_annotation(tier.name, int(round(ann[0]*1000)),
                                           int(round(ann[1]*1000)), ann[2])
        return eaf_out
Example #2
0
    def to_eaf(self, skipempty=True, pointlength=0.1):
        """Convert the object to an pympi.Elan.Eaf object

        :param int pointlength: Length of respective interval from points in
                                seconds
        :param bool skipempty: Skip the empty annotations
        :returns: :class:`pympi.Elan.Eaf` object
        :raises ImportError: If the Eaf module can't be loaded.
        :raises ValueError: If the pointlength is not strictly positive.
        """
        from pympi.Elan import Eaf
        eaf_out = Eaf()
        if pointlength <= 0:
            raise ValueError('Pointlength should be strictly positive')
        for tier in self.get_tiers():
            eaf_out.add_tier(tier.name)
            for ann in tier.get_intervals(True):
                if tier.tier_type == 'TextTier':
                    ann = (ann[0], ann[0] + pointlength, ann[1])
                if ann[2].strip() or not skipempty:
                    eaf_out.add_annotation(tier.name,
                                           int(round(ann[0] * 1000)),
                                           int(round(ann[1] * 1000)), ann[2])
        return eaf_out
Example #3
0
    seg2tier = {}
    with open(seg2tier_path) as f:
        for l in f:
            tok = l.strip().split()
            seg2tier[tok[0]] = tok[1]

    tiers = read_ctm(words_ctm_path, seg2tier, eaf)

    for tier in tiers.keys():
        part = eaf.tiers[tier][2]['PARTICIPANT']
        t = eaf.add_tier('{}_words'.format(tier), parent='tier', part=part, ann='Clarin-PL-service')

    for tier, segs in tiers.items():
        for seg in segs:
            eaf.add_annotation('{}_words'.format(tier), seg[0], seg[1], seg[2])

    if args.phones_ctm:
        phones_ctm_path = args.phones_ctm
        tiers = read_ctm(phones_ctm_path, seg2tier, eaf)

        for tier in tiers.keys():
            part = eaf.tiers[tier][2]['PARTICIPANT']
            t = eaf.add_tier('{}_phones'.format(tier), parent='tier', part=part, ann='Clarin-PL-service')

        for tier, segs in tiers.items():
            for seg in segs:
                eaf.add_annotation('{}_phones'.format(tier), seg[0], seg[1], seg[2])

    to_eaf(str(eaf_out_path), eaf)
Example #4
0
def make_elans(input_dir: str, output_dir: str, copy_wavs: bool):
    """
    Make ELAN files based on filenames of WAV files and annotation from matching text file
    :param input_dir: Directory name of folder containing TXT and WAV audio files
    :param  output_dir: Directory name to save EAF files into
    :param copy_wavs: Setting whether or not to copy the WAV file to the output dir
    """
    # Process each file
    files = glob.glob(f'{input_dir}/**/*.txt', recursive=True)
    print(files)

    for filename in files:

        filepath, ext = os.path.splitext(filename)
        basename = os.path.splitext(os.path.basename(filepath))[0]
        subdirname = os.path.basename(os.path.dirname(filepath))

        sex = subdirname[0]
        participant = subdirname[1:]

        # SEX :== m | f
        # SPEAKER_ID :== <INITIALS><DIGIT>
        # INITIALS :== speaker initials, 3 letters
        # DIGIT :== number 0-9 to differentiate speakers with identical initials

        # print(filename)     # input/dr1/fmem0/sa2.txt
        # print(filepath)     # input/dr1/fmem0/sa2
        # print(subdirname)   # fmem0
        # print(basename)     # sa2
        # print(ext)          # txt

        # Get audio file duration - use this as the EAF annotation's end timeslot
        # duration = int(librosa.get_duration(filename=os.path.join(input_dir, filename))*1000)

        # Get annotation from the text file matching on file basename
        with open(filename, 'r', encoding='utf-8') as text_file:
            annotation = text_file.read()
        annotation_split = annotation.split()
        start = int(annotation_split[0])
        duration = int(annotation_split[1])
        # convert audio samples to seconds to ms
        duration = int(duration / 16000 * 1000)
        annotation_text = " ".join(annotation_split[2:])

        # Add any annotation cleaning here
        # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

        print(start, duration, annotation_text)

        # Make EAF file
        output_eaf = Eaf()
        output_eaf.add_tier('default', part=participant)
        output_eaf.add_annotation('default', start, duration, annotation_text)
        output_eaf.add_linked_file(
            os.path.join(output_dir, f'{subdirname}-{basename}.wav'))
        output_eaf.to_file(
            os.path.join(output_dir, f'{subdirname}-{basename}.eaf'))

        # Copy WAV?
        # if copy_wavs:
        shutil.copyfile(
            f'{filepath}.wav',
            os.path.join(output_dir, f'{subdirname}-{basename}.wav'))

    print('>>> Done')