예제 #1
0
def read_eaf(ie, tier, silence_tier, silence_marker, json_data, output_text_dir, output_audio_dir):

    input_eaf = Eaf(ie)

    # Check if the tiers we have been given exist
    tier_names = input_eaf.get_tier_names()
    if tier not in tier_names:
        print('missing tier: ' + tier, file=sys.stderr)
        return False
    if silence_tier not in tier_names:
        print('missing silence tier: ' + silence_tier, file=sys.stderr)

    # get the input_scripts audio file
    inDir, name = os.path.split(ie)
    basename, ext = os.path.splitext(name)
    ia = os.path.join(inDir, basename + ".wav")
    input_audio = AudioSegment.from_wav(ia)

    # We can pass in an arg for a ref tier that has silence labels
    check_silence_ref_tier = False
    if silence_tier in tier_names:
        silence_tier_info = input_eaf.get_parameters_for_tier(silence_tier)
        if silence_tier_info.get("PARENT_REF") == tier:
            check_silence_ref_tier = True

    # Get annotation values, start and end times, and speaker id
    annotations = sorted(input_eaf.get_annotation_data_for_tier(tier))
    params = input_eaf.get_parameters_for_tier(tier)
    if 'PARTICIPANT' in params:
        speaker_id = params['PARTICIPANT']

    i = 0
    for ann in annotations:
        skip = False
        start = ann[0]
        end = ann[1]
        # output_scripts new values, not the original clip start end times
        clip_start = 0
        clip_end = ann[1] - ann[0]
        annotation = ann[2]

        # Check for annotations labelled with a particular symbol on the main tier
        if annotation == silence_marker:
            skip = True

        # Check for existence of an annotation in ref tier to silence
        # Annotation value doesn't matter
        if check_silence_ref_tier and len(input_eaf.get_ref_annotation_at_time(silence_tier, start)):
            skip = True

        if skip is True:
            # print('skipping annotation: ' + annotation, start, end)
            print("skipping" + str(i))
        else:
            print("processing" + str(i))
            # print('processing annotation: ' + annotation, start, end)
            # build the output_scripts audio/text filename
            fname = basename + "_" + str(i)
            obj = {
                'audioFileName': os.path.join(".", fname + ".wav"),
                'transcript': annotation,
                'startMs': clip_start,
                'stopMs': clip_end
            }
            if 'PARTICIPANT' in params:
                obj["speakerId"] = speaker_id
            json_data.append(obj)
            split_audio_by_start_end(input_audio, start, end, fname, ".wav", output_audio_dir)
            write_text(annotation, fname, ".txt", output_text_dir)
            i += 1
예제 #2
0
def read_eaf(ie):

    if verbose:
        print("input file is", ie)

    input_eaf = Eaf(ie)

    # Check if the tiers we have been given exist
    tier_names = list(input_eaf.get_tier_names())
    if verbose:
        print("tier_names", tier_names, file=sys.stderr)

    # Are we working by slice_tier name or order?
    if slice_tier != "default":
        if verbose:
            print("using slice_tier by name:", slice_tier, file=sys.stderr)
    else:

        # Sanity check that the slice_tier num is not greater than the num of tiers
        if tier_order > len(tier_names):
            print("Error: tier number is greater than the number of tiers",
                  file=sys.stderr)
            return False
        if verbose:
            print("using slice_tier by number:",
                  tier_names[tier_order - 1],
                  file=sys.stderr)

    if slice_tier not in tier_names:
        print('Error: missing slice_tier ' + slice_tier, file=sys.stderr)
        return False

    if silence_tier not in tier_names:
        if verbose:
            print('silence tier not found: ' + silence_tier, file=sys.stderr)

    # get the input audio file
    inDir, name = os.path.split(ie)
    basename, ext = os.path.splitext(name)

    # we can write out mp3 or whatever, still require wav input
    ia = os.path.join(inDir, basename + ".wav")
    input_audio = AudioSegment.from_wav(ia)

    # We can pass in an arg for a ref tier that has silence labels
    check_silence_ref_tier = False
    if silence_tier in tier_names:
        silence_tier_info = input_eaf.get_parameters_for_tier(silence_tier)
        if silence_tier_info.get("PARENT_REF") == tier:
            check_silence_ref_tier = True

    # Get annotation values, start and end times, and speaker id
    if text_tier not in tier_names:
        print('Error: missing text tier')
        return False

    annotations = sorted(input_eaf.get_annotation_data_for_tier(text_tier))

    params = input_eaf.get_parameters_for_tier(text_tier)
    if 'PARTICIPANT' in params:
        speaker_id = params['PARTICIPANT']

    annotations_data = []
    i = 0
    for ann in annotations:
        skip = False
        ref_annotation = []
        start = ann[0]
        end = ann[1]
        # output new values, not the original clip start end times
        clip_start = 0
        clip_end = ann[1] - ann[0]
        annotation = ann[2]

        # Check for annotations labelled with a particular symbol on the main tier
        if annotation == silence_marker:
            skip = True

        # Check for existence of an annotation in ref tier to silence
        # Annotation value doesn't matter
        if check_silence_ref_tier:
            ref_annotation = input_eaf.get_ref_annotation_at_time(
                silence_tier, start)
            if len(ref_annotation) is True:
                skip = True

        if skip is True:
            print('skipping annotation: ' + annotation, start, end)
        else:
            print('processing annotation: ' + annotation, start, end)
            # build the output audio/text filename
            fname = basename + "_" + str(i)
            if name_with_annotation:
                fname = slugify(annotation)

            if prefix != '':
                fname = prefix + '_' + fname
            obj = {
                'audioFileName': os.path.join(".", fname + ".wav"),
                'transcript': annotation,
                'startMs': clip_start,
                'stopMs': clip_end
            }
            if 'PARTICIPANT' in params:
                obj["speakerId"] = speaker_id
            annotations_data.append(obj)
            split_audio_by_start_end(input_audio, start, end, fname)
            write_text(annotation, fname)
            i += 1
    # output the json data for the next step in kaldi pipeline
    write_json(annotations_data)

    if verbose:
        print(annotations_data)