Python Eaf.add_linked_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pympi.Elan

클래스/타입: Eaf

메소드/함수: add_linked_file

hotexamples.com에서의 예제들: 4

Python Eaf.add_linked_file - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pympi.Elan.Eaf.add_linked_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Eaf(28)

get_tier_names(11)

to_file(10)

get_annotation_data_for_tier(8)

get_parameters_for_tier(7)

add_tier(6)

get_tier_ids_for_linguistic_type(6)

get_linguistic_type_names(5)

add_linked_file(4)

insert_annotation(4)

add_linguistic_type(3)

add_annotation(3)

get_ref_annotation_at_time(2)

insertAnnotation(1)

remove_tier(1)

add_external_ref(1)

add_controlled_vocabulary(1)

add_lexicon_ref(1)

addTier(1)

generate_annotation_id(1)

tofile(1)

예제 #1

파일 보기

파일: make-elan.py 프로젝트: CoEDL/elan-helpers

def make_elan(source_parent_dir, target_parent_dir):
    """
    Make elan files based on filenames of wav files
    Written for the TIDIGITS corpus, so some things are specific to the name formats of that corpus
    """

    for dirname, dirnames, filenames in os.walk(source_parent_dir):

        # print path to all subdirectories first.
        for subdirname in dirnames:
            print(os.path.join(dirname, subdirname))

        # print path to all filenames.
        for filename in filenames:
            if '.wav' in filename:
                parent, gender, child = dirname.split(os.path.sep)
                basename, ext = os.path.splitext(os.path.basename(filename))
                print(parent, gender, child, filename)

                source_path = os.path.join(source_parent_dir, gender, child)
                target_path = os.path.join(target_parent_dir, gender, child)

                if not os.path.exists(target_path):
                    print(target_path)
                    os.makedirs(target_path)

                # Audio file duration - use this as end timeslot
                duration = int(librosa.get_duration(filename=os.path.join(source_path, filename))*1000)

                # Make file annotation from filename (minus the suffix)
                annotation = " ".join([char for char in basename[:-1]])
                # These are specific to the TIDIGITS naming convention
                annotation = annotation.replace("o", "oh")
                annotation = annotation.replace("z", "zero")

                text = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

                print(filename, duration, annotation, text)

                # Make elan
                output_eaf = Eaf()
                output_eaf.add_tier('tx')
                output_eaf.insert_annotation('tx', 0, duration, text)
                output_eaf.add_linked_file(os.path.join(target_path, f'{basename}.wav'))

                output_eaf.to_file(os.path.join(target_path, f'{basename}.eaf'))

예제 #2

파일 보기

파일: convert.py 프로젝트: CoEDL/elan-helpers

def make_elans(input_dir: str, output_dir: str, copy_wavs: bool):
    """
    Make ELAN files based on filenames of WAV files and annotation from matching text file
    :param input_dir: Directory name of folder containing TXT and WAV audio files
    :param  output_dir: Directory name to save EAF files into
    :param copy_wavs: Setting whether or not to copy the WAV file to the output dir
    """
    # Process each file
    for _, _, filenames in os.walk(input_dir):

        for filename in filenames:
            if '.wav' in filename:
                basename, ext = os.path.splitext(os.path.basename(filename))
                print(basename)

                # Get audio file duration - use this as the EAF annotation's end timeslot
                duration = int(
                    librosa.get_duration(
                        filename=os.path.join(input_dir, filename)) * 1000)

                # Get annotation from the text file matching on file basename
                annotation = get_annotation(input_dir, basename)

                # Add any annotation cleaning here
                # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

                print(duration, annotation)

                # Make EAF file
                output_eaf = Eaf()
                # output_eaf.add_tier('default')
                output_eaf.insert_annotation('default', 0, duration,
                                             annotation)
                output_eaf.add_linked_file(
                    os.path.join(output_dir, f'{basename}.wav'))
                output_eaf.to_file(os.path.join(output_dir, f'{basename}.eaf'))

                # Copy WAV?
                if copy_wavs:
                    shutil.copyfile(os.path.join(input_dir, filename),
                                    os.path.join(output_dir, filename))
    print('>>> Done')

예제 #3

파일 보기

파일: make-elan.py 프로젝트: CoEDL/elan-helpers

def make_elans(spreadsheet: str, source: str, target: str):
    """
    Make ELAN files based on filenames of WAV files
    :param spreadsheet: Path and file name of the spreadsheet containing WAV filenames and matching annotations
    :param source: Directory name of folder containing WAV audio files
    :param  target: Directory name to save EAF files into
    """

    # Read spreadsheet data and convert to JSON format
    print('Loading data from spreadsheet')
    annotations = get_annotations(spreadsheet)

    # Process each file
    print('Processing WAVs')
    for _, _, filenames in os.walk(source):

        for filename in filenames:
            if '.wav' in filename:
                basename, ext = os.path.splitext(os.path.basename(filename))

                # Get audio file duration - use this as the EAF annotation's end timeslot
                duration = int(
                    librosa.get_duration(
                        filename=os.path.join(source, filename)) * 1000)

                # Get annotation from the source data matching on filename
                annotation = get_annotation(annotations, filename)

                # Add any annotation cleaning here
                # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

                print(filename, duration, annotation)

                # Make EAF file
                output_eaf = Eaf()
                output_eaf.add_tier('tx')
                output_eaf.insert_annotation('tx', 0, duration, annotation)
                output_eaf.add_linked_file(
                    os.path.join(target, f'{basename}.wav'))
                output_eaf.to_file(os.path.join(target, f'{basename}.eaf'))
    print('>>> Done')

예제 #4

파일 보기

파일: convert.py 프로젝트: CoEDL/elan-helpers

def make_elans(input_dir: str, output_dir: str, copy_wavs: bool):
    """
    Make ELAN files based on filenames of WAV files and annotation from matching text file
    :param input_dir: Directory name of folder containing TXT and WAV audio files
    :param  output_dir: Directory name to save EAF files into
    :param copy_wavs: Setting whether or not to copy the WAV file to the output dir
    """
    # Process each file
    files = glob.glob(f'{input_dir}/**/*.txt', recursive=True)
    print(files)

    for filename in files:

        filepath, ext = os.path.splitext(filename)
        basename = os.path.splitext(os.path.basename(filepath))[0]
        subdirname = os.path.basename(os.path.dirname(filepath))

        sex = subdirname[0]
        participant = subdirname[1:]

        # SEX :== m | f
        # SPEAKER_ID :== <INITIALS><DIGIT>
        # INITIALS :== speaker initials, 3 letters
        # DIGIT :== number 0-9 to differentiate speakers with identical initials

        # print(filename)     # input/dr1/fmem0/sa2.txt
        # print(filepath)     # input/dr1/fmem0/sa2
        # print(subdirname)   # fmem0
        # print(basename)     # sa2
        # print(ext)          # txt

        # Get audio file duration - use this as the EAF annotation's end timeslot
        # duration = int(librosa.get_duration(filename=os.path.join(input_dir, filename))*1000)

        # Get annotation from the text file matching on file basename
        with open(filename, 'r', encoding='utf-8') as text_file:
            annotation = text_file.read()
        annotation_split = annotation.split()
        start = int(annotation_split[0])
        duration = int(annotation_split[1])
        # convert audio samples to seconds to ms
        duration = int(duration / 16000 * 1000)
        annotation_text = " ".join(annotation_split[2:])

        # Add any annotation cleaning here
        # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

        print(start, duration, annotation_text)

        # Make EAF file
        output_eaf = Eaf()
        output_eaf.add_tier('default', part=participant)
        output_eaf.add_annotation('default', start, duration, annotation_text)
        output_eaf.add_linked_file(
            os.path.join(output_dir, f'{subdirname}-{basename}.wav'))
        output_eaf.to_file(
            os.path.join(output_dir, f'{subdirname}-{basename}.eaf'))

        # Copy WAV?
        # if copy_wavs:
        shutil.copyfile(
            f'{filepath}.wav',
            os.path.join(output_dir, f'{subdirname}-{basename}.wav'))

    print('>>> Done')