Python resampled Beispiele, gentle.resampled Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: app.py Projekt: richgong/gentle

def run_gentle(key='103/1241/103_1241_000000_000001'):
    text_file = f'{TRAIN_PATH}/{key}.normalized.txt'
    audio_file = f'{TRAIN_PATH}/{key}.wav'
    json_file = f'{TRAIN_PATH}/{key}.json'

    if os.path.isfile(json_file):
        with open(json_file) as r:
            return json.loads(r.read())

    with open(text_file, encoding="utf-8") as fh:
        transcript = fh.read()

    logging.info("converting audio to 8K sampled wav")

    def on_progress(p):
        return
        # for k,v in p.items():
        #    logging.debug("%s: %s" % (k, v))
    with gentle.resampled(audio_file) as wavfile:
        # logging.info("starting alignment")
        aligner = gentle.ForcedAligner(resources,
                                       transcript,
                                       nthreads=multiprocessing.cpu_count(),
                                       disfluency=False,  # include disfluencies (uh, um) in alignment
                                       conservative=False,
                                       disfluencies=set(['uh', 'um']))
        result = aligner.transcribe(wavfile, progress_cb=None, logging=logging)
        result_dict = result.to_dict()
        with open(json_file, 'w') as f:
            f.write(json.dumps(result_dict, indent=2))
        return result_dict

Beispiel #2

0

Datei anzeigen

def align_file(transcription, snd_filename):
    # TODO: Add a file of "sound files that did not finish"
    class TimeoutException(Exception):
        pass

    @contextmanager
    def time_limit(seconds):
        def signal_handler(signum, frame):
            raise TimeoutException("Timed out!")

        signal.signal(signal.SIGALRM, signal_handler)
        signal.alarm(seconds)
        try:
            yield
        finally:
            signal.alarm(0)

    with gentle.resampled(snd_filename) as wavfile:
        aligner = gentle.ForcedAligner(resources,
                                       transcription,
                                       nthreads=nthreads)
        try:
            with time_limit(10):
                result = {snd_filename: aligner.transcribe(wavfile)}
        except TimeoutException as e:
            print(
                "Transcription of {} timed out! Please check that your transcription is accurate."
                .format(fid))
            result = {snd_filename: None}
    return result

Beispiel #3

0

Datei anzeigen

    def get_gentle_response(self, parsed_txt_path):
        """Returns response from gentle
    
        Args:
            parsed_txt_path (str): parsed txt path

        Returns:
            list: aligned words
        """

        with open(parsed_txt_path, encoding="utf-8") as fh:
            transcript = fh.read()

        resources = gentle.Resources()
        # words for gentle to ignore when aligning
        disfluencies = set(['uh', 'um'])
        with gentle.resampled(self.audiopath) as wavfile:
            aligner = gentle.ForcedAligner(
                resources,
                transcript,
                nthreads=multiprocessing.cpu_count(),
                disfluency=False,
                conservative=False,
                disfluencies=disfluencies)
            result = aligner.transcribe(wavfile)

        return [word.as_dict(without="duration") for word in result.words]

Beispiel #4

0

Datei anzeigen

Datei: utils.py Projekt: ddrevicky/canetis

def run_gentle(seg, transcript):
    """
    Takes in a segment
    1. create new text file containing text
    2. create new audio with pydub
    3. run Gentle with these two
    4. delete text file/audio files

    Parameters
    ---------
    seg : Segment object to align with Gentle
    transcript : string holding the relevant transcript for this segment
    """
    audio_cut = seg.audio_file[1000 * seg.start_audio:1000 * seg.end_audio]

    audio_cut.export("temp_audio.wav", format="wav")

    # run Gentle
    resources = gentle.Resources()
    with gentle.resampled("temp_audio.wav") as wavfile:
        aligner = gentle.ForcedAligner(resources, transcript)
        result = aligner.transcribe(wavfile).words

    # delete cut audio file
    os.remove("temp_audio.wav")

    # fix unaligned-word start/end time data
    fix_unaligned(result, len(audio_cut) / 1000)

    # put gentle timestamps in relation to entire file
    for word in result:
        word.start += seg.start_audio
        word.end += seg.start_audio

    return result

Beispiel #5

0

Datei anzeigen

    def run_gentle(audio_path: str,
                   text_content: str,
                   tokenization_view: View = None):

        with gentle.resampled(audio_path) as audio_file:
            resources = gentle.Resources()
            aligner = gentle.ForcedAligner(
                resources,
                text_content,
                nthreads=multiprocessing.cpu_count(),
                disfluencies={'uh', 'um'},
                disfluency=True,
                conservative=False)
            if tokenization_view is not None:
                aligner.ms._seq = []
                for token in tokenization_view.get_annotations(Uri.TOKEN):
                    print(token.serialize(pretty=True))
                    start = token.properties['start']
                    end = token.properties['end']
                    token_text = text_content[start:end]
                    kaldi_token = {
                        'start':
                        start,
                        'end':
                        end,
                        'token':
                        metasentence.kaldi_normalize(token_text,
                                                     aligner.ms.vocab)
                    }
                    aligner.ms._seq.append(kaldi_token)
            result = aligner.transcribe(audio_file)
            return result

Beispiel #6

0

Datei anzeigen

Datei: forced_align.py Projekt: gchrupala/vgs

def align(audiopath, text, nthreads=1):
    resources = gentle.Resources()
    logging.info("converting audio to 8K sampled wav")
    with gentle.resampled(audiopath) as wavfile:
        logging.info("Starting alignment")
        aligner = gentle.ForcedAligner(resources, text, nthreads=nthreads, disfluency=False, 
                                        conservative=False)
        return aligner.transcribe(wavfile, progress_cb=on_progress, logging=logging)

Beispiel #7

0

Datei anzeigen

    def test_transcriber(self):
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled('examples/data/lucier.mp3', 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")

Beispiel #8

0

Datei anzeigen

Datei: align.py Projekt: gchrupala/rascal

def align(audiopath, transcript):
    logging.info("converting audio to 8K sampled wav")
    with gentle.resampled(audiopath) as wavfile:
        logging.info("starting alignment")
        aligner = gentle.ForcedAligner(resources,
                                       transcript,
                                       nthreads=nthreads,
                                       disfluency=False,
                                       conservative=False)
        return json.loads(
            aligner.transcribe(wavfile,
                               progress_cb=on_progress,
                               logging=logging).to_json())

Beispiel #9

0

Datei anzeigen

    def test_transcriber(self):
        import subprocess
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled(self.audio, 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")

Beispiel #10

0

Datei anzeigen

Datei: transcriber.py Projekt: bit/gentle

    def test_transcriber(self):
        import subprocess
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled(self.audio, 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")

Beispiel #11

0

Datei anzeigen

Datei: gentle_alignment.py Projekt: madhasri/lexical-stress-detection

def align_audio(wav_path, transcript):
    with gentle.resampled(wav_path) as wavfile:
        print("starting alignment {}".format(wav_path))
        aligner = gentle.ForcedAligner(RESOURCES,
                                       transcript,
                                       nthreads=N_THREADS,
                                       disfluency=False,
                                       conservative=False,
                                       disfluencies=DISFLUENCIES)
        result = aligner.transcribe(wavfile,
                                    progress_cb=_on_progress,
                                    logging=logging)
        result_json = json.loads(result.to_json())

    return result_json

Beispiel #12

0

Datei anzeigen

def run_gentle(video_path, vid, result_path):
    vtt_subtitle = read_subtitle(vid)
    transcript = ''
    for i, sub in enumerate(vtt_subtitle):
        transcript += (vtt_subtitle[i].text + ' ')
    transcript = re.sub('\n', ' ', transcript)  # remove newline characters

    # align
    with gentle.resampled(video_path) as wav_file:
        aligner = gentle.ForcedAligner(resources, transcript, nthreads=nthreads, disfluency=False, conservative=False,
                                       disfluencies=disfluencies)
        result = aligner.transcribe(wav_file, logging=logging)

    # write results
    with open(result_path, 'w', encoding="utf-8") as fh:
        fh.write(result.to_json(indent=2))

Beispiel #13

0

Datei anzeigen

Datei: forced_align.py Projekt: gchrupala/fanta

def align(args):
    audiopath, transcript = args
    with gentle.resampled(audiopath) as wavfile:
        logging.info("Audio file: {}".format(audiopath))
        logging.info("Transcript: <{}...>".format(transcript[:40]))
        aligner = gentle.ForcedAligner(resources,
                                       transcript,
                                       nthreads=1,
                                       disfluency=False,
                                       conservative=False)
        result = json.loads(
            aligner.transcribe(wavfile,
                               progress_cb=on_progress,
                               logging=logging).to_json())
        result['audiopath'] = audiopath
        return result

Beispiel #14

0

Datei anzeigen

Datei: transcriber.py Projekt: ruohoruotsi/gentle

    def test_aligner(self):
        import subprocess
        from gentle import resampled, standard_kaldi, Resources
        from gentle.forced_aligner import ForcedAligner
        from gentle.transcription import Word

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        align = ForcedAligner(resources, self.transcript, nthreads=1)

        with resampled(self.audio, 5.0, 5.0) as filename:
            transcription = align.transcribe(filename)
            words = transcription.words
        self.assertEqual(words[0].word, "i")
        self.assertEqual(words[1].word, "am")
        self.assertEqual(words[1].case, Word.SUCCESS)

Beispiel #15

0

Datei anzeigen

Datei: transcriber.py Projekt: itsvaibhav01/Gentle

    def test_aligner(self):
        import subprocess
        from gentle import resampled, standard_kaldi, Resources
        from gentle.forced_aligner import ForcedAligner
        from gentle.transcription import Word

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        align = ForcedAligner(resources, self.transcript, nthreads=1)

        with resampled(self.audio, 5.0, 5.0) as filename:
            transcription = align.transcribe(filename)
            words = transcription.words
        self.assertEqual(words[0].word, "i")
        self.assertEqual(words[1].word, "am")
        self.assertEqual(words[1].case, Word.SUCCESS)

Beispiel #16

0

Datei anzeigen

def align_db(data):
    import pathlib
    except_i_list = list(range(len(data)))

    while True:
        for i in tqdm(except_i_list):
            row = data.iloc[i]
            f = row.sentence_path
            transcript = row.transcription
            with gentle.resampled(f) as wavfile:
                aligner = gentle.ForcedAligner(resources,
                                               transcript,
                                               nthreads=40)
                #print("Align starts")
                try:
                    result = aligner.transcribe(wavfile,
                                                progress_cb=on_progress,
                                                logging=logging)
                except:
                    except_i_list.append(i)

                    continue
                #print("Align ends")
            # os.system('python align.py '+f+' words.txt -o test.json')

            output = os.path.join(
                'alignments',
                '/'.join(f.split('/')[-4:]).split('.')[0] + '.json')
            pathlib.Path('/'.join(output.split('/')[0:-1])).mkdir(
                parents=True, exist_ok=True)

            fh = open(output, 'w')
            #print("{} starts to be written".format(output))
            fh.write(result.to_json(indent=2))
            #print("{} written".format(output))
            if output:
                logging.info("output written to %s" % (output))

            fh.close()
            #print("i={}".format(i))
            #print("f={}".format(row.sentence_path))

        print("except_i_list:", except_i_list)
        if len(except_i_list) == 0:
            break

Beispiel #17

0

Datei anzeigen

def get_transcribed_words(textFile, audioFile):
    with open(textFile) as file:
        transcript = file.read()

    resources = gentle.Resources()

    with gentle.resampled(audioFile) as wavfile:
        aligner = gentle.ForcedAligner(resources, transcript)
        result = aligner.transcribe(wavfile)

    transcribed_words = []
    for word in result.words:
        phones = word.phones
        if phones is not None:
            root_phones = []
            for phone in phones:
                root_phone = phone['phone'][0:phone['phone'].index('_')]
                root_phones.append(root_phone)
            transcribed_words.append(Word(word.word, root_phones))

    return transcribed_words

Beispiel #18

0

Datei anzeigen

def align_onefile(data, align_json_path):
    import pathlib

    splitted_path = split_path(align_json_path)
    json_file_name = splitted_path[-1]
    id, _ = os.path.splitext(json_file_name)
    emotion = splitted_path[-2]
    speaker = splitted_path[-3]

    row = data[(data.id == id)
               & (data.speaker == speaker)].iloc[0]  # iloc: df to series
    f = row.sentence_path
    transcript = row.transcription
    with gentle.resampled(f) as wavfile:
        aligner = gentle.ForcedAligner(resources, transcript, nthreads=40)
        #print("Align starts")
        try:
            result = aligner.transcribe(wavfile,
                                        progress_cb=on_progress,
                                        logging=logging)
        except:
            return
            #except_i_list.append(i)
            #print("except_i_list:", except_i_list)
        #print("Align ends")
    # os.system('python align.py '+f+' words.txt -o test.json')

    output = os.path.join('alignments',
                          '/'.join(f.split('/')[-4:]).split('.')[0] + '.json')
    pathlib.Path('/'.join(output.split('/')[0:-1])).mkdir(parents=True,
                                                          exist_ok=True)

    fh = open(output, 'w')
    #print("{} starts to be written".format(output))
    fh.write(result.to_json(indent=2))
    #print("{} written".format(output))
    if output:
        logging.info("output written to %s" % (output))

    fh.close()

Beispiel #19

0

Datei anzeigen

Datei: transcript_alignment.py Projekt: bztia/scannertools

 def gentle_solve(self, audio_path, transcript):
     """
     gentle wrapper to solve the forced alignment given audio file and text string 
     """
     args = {
         'log': 'INFO',
         'nthreads': self.num_thread,
         'conservative': True,
         'disfluency': True,
     }
     disfluencies = set(['uh', 'um'])
     resources = gentle.Resources()
     with gentle.resampled(audio_path) as wavfile:
         aligner = gentle.ForcedAligner(
             resources,
             transcript,
             nthreads=args['nthreads'],
             disfluency=args['disfluency'],
             conservative=args['conservative'],
             disfluencies=disfluencies)
         result = aligner.transcribe(wavfile)
     return [word.as_dict(without="phones") for word in result.words]

Beispiel #20

0

Datei anzeigen

def align_db(data):
    import pathlib

    for i, row in data.iterrows():
        f = row.sentence_path
        transcript = row.transcription
        with gentle.resampled(f) as wavfile:
            aligner = gentle.ForcedAligner(resources, transcript)
            result = aligner.transcribe(wavfile,
                                        progress_cb=on_progress,
                                        logging=logging)
        # os.system('python align.py '+f+' words.txt -o test.json')

        output = os.path.join(
            'alignments', '/'.join(f.split('/')[-4:]).split('.')[0] + '.json')
        pathlib.Path('/'.join(output.split('/')[0:-1])).mkdir(parents=True,
                                                              exist_ok=True)

        fh = open(output, 'w')
        fh.write(result.to_json(indent=2))
        if output:
            logging.info("output written to %s" % (output))

        fh.close()

Beispiel #21

0

Datei anzeigen

Datei: gentle_align.py Projekt: thekangaroofrommunich/LipSyncTool

def start_aligning(audiofile, txtfile, output):
    log_level = "INFO"  #can be one of the following: (DEBUG, INFO, WARNING, ERROR, or CRITICAL)
    logging.getLogger().setLevel(log_level)

    disfluencies = set(['uh', 'um'])

    with open(txtfile, encoding="utf-8") as fh:
        transcript = fh.read()

    resources = gentle.Resources()
    logging.info("converting audio to 8K sampled wav")

    with gentle.resampled(audiofile) as wavfile:
        logging.info("starting alignment")
        aligner = gentle.ForcedAligner(
            resources, transcript, nthreads
        )  #, True, False, disfluencies)#, conservative, disfluencies)
        result = aligner.transcribe(wavfile,
                                    progress_cb=on_progress,
                                    logging=logging)

    fh = open(output, 'w', encoding="utf-8")
    fh.write(result.to_json(indent=2))
    logging.info("output written to %s" % (output))

Beispiel #22

0

Datei anzeigen

Datei: transcriber.py Projekt: bit/gentle

        # word in the audio.
        words.sort(key=lambda word: word.start)
        words.append(transcription.Word(word="__dummy__"))
        words = [words[i] for i in range(len(words)-1) if not words[i].corresponds(words[i+1])]

        return words, duration


if __name__=='__main__':
    # full transcription
    import json
    import sys

    import logging
    logging.getLogger().setLevel('INFO')

    import gentle
    from gentle import standard_kaldi
    from gentle import kaldi_queue

    resources = gentle.Resources()

    k_queue = kaldi_queue.build(resources, 3)
    trans = MultiThreadedTranscriber(k_queue)

    with gentle.resampled(sys.argv[1]) as filename:
        words, duration = trans.transcribe(filename)

    open(sys.argv[2], 'w').write(transcription.Transcription(words=words).to_json())

Beispiel #23

0

Datei anzeigen

def data_generator(file_id, min_dur=2, max_dur=(5, 20), randomize=False):
    """Given a file id and random seed, align the audio and text versions after
    dividing into single-speaker utterances, and write out texts of unbroken
    captured strings and their corresponding audio segments when the latter are
    between 2 and max_length seconds.
    """

    if randomize:
        seed = ord(file_id[-1])
        random.seed(seed)
        max_length = random.randint(max_dur[0], max_dur[1])
    else:
        max_length = max_dur[1]

    logger.info("Processing file id {}...".format(file_id))

    # grab audio file from s3
    mp3 = os.path.join(mp3_dir, "{}.mp3".format(file_id))
    wav = os.path.join(mp3_dir, "{}.wav".format(file_id))

    if not os.path.isfile(wav):
        if not os.path.isfile(mp3):
            bucket = boto3.resource("s3").Bucket("cgws")
            logger.info("Downloading file {} from S3...".format(file_id))
            try:
                bucket.download_file("{}.mp3".format(file_id), mp3)
            except:
                logger.warning(
                    "Could not download file {} from S3.".format(file_id))
                return

        FNULL = open(os.devnull, 'w')
        subprocess.call([
            "sox", "{}".format(mp3), "-r", "16k", "{}".format(wav), "remix",
            "-"
        ],
                        stdout=FNULL,
                        stderr=FNULL)

    # transcript
    txt_file = os.path.join(records_dir, "{}.txt".format(file_id))
    logger.info("Reading transcript {}...".format(file_id))
    try:
        with open(txt_file, "r") as tr:
            transcript = tr.read()
    except IOError:
        logger.warning("File {} does not exist.".format(txt_file))
        return

    # split transcript by speaker, and get timestamps (as seconds)
    # of the boundaries of each paragraph
    logger.info("Splitting transcript by speaker...")
    paragraphs = []
    times = []
    for paragraph in transcript.split("\n"):
        catch = re.match("\d:\d+:\d+\.\d", paragraph)
        if catch:
            timestamp = catch.group()
            h, m, s = timestamp.split(":")
            time = int(h) * 60 * 60 + int(m) * 60 + float(s)
            paragraphs.append(paragraph)
            times.append(time)
    file_end = get_duration(mp3)
    times.append(file_end)

    total_captures, captures_dur = 0, 0

    # taking one speaker at a time, find unbroken alignments up to max_length
    # and write out corresponding files
    for i, paragraph in enumerate(paragraphs):
        logger.info("Cleaning and trimming paragraph {}: \n{}".format(
            i, paragraph))

        paragraph_start, paragraph_end = times[i], times[i + 1]
        # don't bother with short files
        if paragraph_end - paragraph_start < min_dur:
            logger.info("Skipping paragraph {} (too short)...".format(i))
            continue
        if len(paragraph.split()) < 2:
            logger.info("Skipping paragraph {} (too few words)...".format(i))
            continue

        temp_wav = trim(file_id, wav, paragraph_start, paragraph_end, 0,
                        "/tmp")

        # unique name of json object to read/write
        paragraph_hash = hashlib.sha1("{}{}{}{}".format(
            file_id, paragraph, paragraph_start, paragraph_end)).hexdigest()

        if use_filename_json is True:
            json_file = os.path.join(
                json_out_dir, "{}_{}_{}.json".format(file_id, paragraph_start,
                                                     paragraph_end))
        else:
            json_file = os.path.join(json_out_dir,
                                     "{}.json".format(paragraph_hash))

        result = None

        # check if json object has been written from a previous run
        if not os.path.isfile(json_file):
            logger.info(
                "JSON file with hash {} not found.".format(paragraph_hash))

            try:
                logger.info("Resampling paragraph {}...".format(i))
                with gentle.resampled(temp_wav) as wav_file:
                    resources = gentle.Resources()
                    cleaned = clean(paragraph)
                    logger.info(
                        "Aligning paragraph {} with gentle...".format(i))
                    aligner = gentle.ForcedAligner(
                        resources,
                        cleaned,
                        nthreads=multiprocessing.cpu_count(),
                        disfluency=False,
                        conservative=False,
                        disfluencies=set(["uh", "um"]))
                    logger.info(
                        "Transcribing audio segment {} with gentle...".format(
                            i))
                    result = aligner.transcribe(wav_file)
            except:
                logger.warning("Paragraph {} - {} ".format(
                    i,
                    sys.exc_info()[2]))
                os.remove(temp_wav)
                continue

            aligned_words = result.to_json()
            with open(json_file, "w") as f:
                f.write(aligned_words)

            if not result:
                logger.info("Empty result for paragraph {}.".format(i))
                os.remove(temp_wav)
                continue

        else:
            logger.info(
                "Found JSON of paragraph {} -- skipping alignment and transcription by gentle"
                .format(i))

        # dictionary of aligned words
        with open(json_file) as f:
            aligned = json.loads(f.read())

        # save all consecutively captured strings
        # and keep track of their start and stop times
        captures = []
        current, start_time, end_time = [], 0, 0

        # loop through every word as returned from gentle
        logger.info("Capturing strings in paragraph {}...".format(i))

        if not "words" in aligned:
            logger.info("No words in paragraph {}.".format(i))
            os.remove(temp_wav)
            continue

        # first two seconds will be skipped even if it contains a capture
        for catch in aligned["words"]:
            # successful capture
            if catch["case"] == "success" and catch[
                    "alignedWord"] != "<unk>" and catch[
                        'start'] > 5 and catch['end'] - catch['start'] > .07:

                # new capture group
                if not current:
                    # begin capturing if it has been two seconds since the last word
                    if catch["start"] - end_time > 1:
                        current = [catch["alignedWord"]]
                        start_time = catch["start"]
                        end_time = catch["end"]

                # continuation of a capture group
                else:
                    # large gap between last capture and this one
                    # likely that something was missing in the transcript
                    if catch["start"] - end_time > 1:
                        save_capture(captures, start_time, end_time, current)
                        current = []

                    # adding this word would equal or exceed max_length
                    elif catch["end"] - start_time >= max_length:
                        save_capture(captures, start_time, end_time, current,
                                     min_dur)
                        current = []
                        if randomize:
                            max_length = random.randint(max_dur[0], max_dur[1])

                    # continue capturing
                    else:
                        current.append(catch["alignedWord"])
                        end_time = catch["end"]

            # a miss after prior success(es)
            elif current:
                save_capture(captures, start_time, end_time, current, min_dur)
                current = []

        # last word was a success but current capture hasn't been saved yet
        if current:
            save_capture(captures, start_time, end_time, current, min_dur)

        # write strings and split audio into consituent segments
        logger.info(
            "Writing text and audio segments from paragraph {}...".format(i))
        for result in captures:
            txt_segment = os.path.join(
                text_out_dir, "{}_{}_{}.txt".format(
                    file_id,
                    "{:07d}".format(int((times[i] + result["start"]) * 100)),
                    "{:07d}".format(int((times[i] + result["end"]) * 100))))
            with open(txt_segment, "w") as f:
                f.write("{}\n".format(result["string"]))

            segment = trim(file_id, temp_wav, result["start"], result["end"],
                           times[i], wav_out_dir)
            # make sure durations match
            segment_dur = get_duration(segment)
            assert segment_dur - result["duration"] <= .01

            total_captures += 1
            captures_dur += segment_dur

        # delete the clip of this speaker
        os.remove(temp_wav)

    # per-file logging
    total_dur = get_duration(mp3)
    logger.info("Wrote {} segments from {}, totalling {} seconds, out of a possible {}, ratio {:.2f}."\
          .format(total_captures,file_id,captures_dur,total_dur,captures_dur/total_dur))

    return

Beispiel #24

0

Datei anzeigen

def generate_diphones(audio_file,
                      transcript_file,
                      output_folder,
                      pre_padding=0.0,
                      post_padding=0.0) -> set:
    """Generates the list of diphones for a given audio_file using the transcript and store the diphones in the
    output_folder
    Args:
        :param audio_file:(str) Name of the audio file to segment (.wav)
        :param transcript_file:(str) Name of the text file with the transcript
        :param output_folder:(str) Name of the destination directory to store the diphones
        :param pre_padding:(float) A fraction of audio to clip before the generated diphone
        :param post_padding:(float) A fraction of audio to clip after the generated diphone
    Returns:
        :return set of generated diphones
    """
    nthreads = multiprocessing.cpu_count()
    disfluency = False
    conservative = False
    disfluencies = {'uh', 'um'}

    with open(transcript_file, encoding="utf-8") as fh:
        transcript = fh.read()
        print(transcript)
    resources = gentle.Resources()

    with gentle.resampled(audio_file) as wavfile:
        aligner = gentle.ForcedAligner(resources,
                                       transcript,
                                       nthreads=nthreads,
                                       disfluency=disfluency,
                                       conservative=conservative,
                                       disfluencies=disfluencies)
        result = aligner.transcribe(wavfile)
        r = json.loads(result.to_json())

    phone_time_list = []
    diphones = set()
    for word in r['words']:
        start = word['start'] * 1000
        for phone in word['phones']:
            diphones.add(phone['phone'])
            phone_time_list.append(
                [phone['phone'], start, start + phone['duration'] * 1000])
            start = start + phone['duration'] * 1000

    for entry in phone_time_list:
        diphone = segment_audio(audio_file, entry[1], entry[2], pre_padding,
                                post_padding)
        # print('Old ' + str(entry[0]) + ':' + str(len(diphone)))
        if len(diphone) < 150:
            try:
                diphone = ensure_length(diphone, 150)
            except exceptions.CouldntDecodeError:
                print(
                    entry[0],
                    'is very small.........................................................'
                )
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)
        output_filename = output_folder + '/' + str(entry[0]) + '.wav'
        diphone.export(output_filename, format='wav')
        print('New ' + str(entry[0]) + ':' + str(len(diphone)))
    return diphones

Beispiel #25

0

Datei anzeigen

Datei: align.py Projekt: alongreyber/speakup

disfluencies = set(['uh', 'um'])


def on_progress(p):
    for k, v in p.items():
        logging.debug("%s: %s" % (k, v))


with open(args.txtfile) as fh:
    transcript = fh.read()

resources = gentle.Resources()
logging.info("converting audio to 8K sampled wav")

with gentle.resampled(args.audiofile) as wavfile:
    logging.info("starting alignment")
    aligner = gentle.ForcedAligner(resources,
                                   transcript,
                                   nthreads=args.nthreads,
                                   disfluency=args.disfluency,
                                   conservative=args.conservative,
                                   disfluencies=disfluencies)
    result = aligner.transcribe(wavfile,
                                progress_cb=on_progress,
                                logging=logging)

fh = open(args.output, 'w') if args.output else sys.stdout
fh.write(result.to_json(indent=2))
if args.output:
    logging.info("output written to %s" % (args.output))

Beispiel #26

0

Datei anzeigen

Datei: align.py Projekt: bit/gentle

parser.add_argument(
        'txtfile', type=str,
        help='transcript text file')
args = parser.parse_args()

log_level = args.log.upper()
logging.getLogger().setLevel(log_level)

disfluencies = set(['uh', 'um'])

def on_progress(p):
    for k,v in p.items():
        logging.debug("%s: %s" % (k, v))


with open(args.txtfile) as fh:
    transcript = fh.read()

resources = gentle.Resources()
logging.info("converting audio to 8K sampled wav")

with gentle.resampled(args.audiofile) as wavfile:
    logging.info("starting alignment")
    aligner = gentle.ForcedAligner(resources, transcript, nthreads=args.nthreads, disfluency=args.disfluency, conservative=args.conservative, disfluencies=disfluencies)
    result = aligner.transcribe(wavfile, progress_cb=on_progress, logging=logging)

fh = open(args.output, 'w') if args.output else sys.stdout
fh.write(result.to_json(indent=2))
if args.output:
    logging.info("output written to %s" % (args.output))

Beispiel #27

0

Datei anzeigen

    audio_path = os.path.join(DIR_PATH, "audio", audio_name)
    transcript_path = os.path.join(DIR_PATH, "transcripts", transcript_name)

    if not os.path.isfile(transcript_path):
        continue

    # get transcript text
    transcript_text = ""
    with open(transcript_path) as f:
        transcript_text = f.read()

    # run Gentle
    print "Running Gentle on", transcript_name
    resources = gentle.Resources()
    with gentle.resampled(audio_path) as wavfile:
        aligner = gentle.ForcedAligner(resources, transcript_text)
        result = aligner.transcribe(wavfile).words

    # create gentle_results directory if it doesn't already exist
    # usually better to use try-catch here, but not worried about race conditions right now
    if not os.path.exists(os.path.join(DIR_PATH, "gentle_results")):
        os.makedirs(os.path.join(DIR_PATH, "gentle_results"))

    # write Gentle output to gentle_results directory
    with open(
            os.path.join(DIR_PATH, "gentle_results", transcript_name + ".txt"),
            "w") as f:
        output = []
        for word in result:
            output.append({

Beispiel #28

0

Datei anzeigen

        parsed_source_xml = ET.parse(source_path_xml)
        parsed_root = parsed_source_xml.getroot()
        for turn in parsed_root.findall('.//vx:Turn', namespaces):
            if 'DISCLAIMER' != turn.attrib['Speaker']:
                for fragment in turn.findall('.//vx:Fragment', namespaces):
                    target_file_txt.write(fragment.text)

    with open(target_path_txt) as target_file_txt:
        transcript = target_file_txt.read()

    source_path_mp3 = source_path_xml.replace('transcripts/extracted',
                                              'audio').replace('.xml', '.mp3')
    if os.path.isfile(source_path_mp3) and transcript:
        target_path_json = target_path_txt.replace('.txt', '.json')
        with open(target_path_json, 'w') as target_file_json:
            print('converting audio to 8K sampled wav')
            with gentle.resampled(source_path_mp3) as wavfile:
                print('starting alignment for', source_path_xml, ' and ',
                      source_path_mp3)
                aligner = gentle.ForcedAligner(resources,
                                               transcript,
                                               nthreads=nthreads,
                                               disfluency=False,
                                               conservative=False,
                                               disfluencies=disfluencies)
                result = aligner.transcribe(wavfile,
                                            progress_cb=on_progress,
                                            logging=logging)
                target_file_json.write(result.to_json(indent=2))
        print('finished alignment in', target_path_json)

Beispiel #29

0

Datei anzeigen

Datei: gen_diphones.py Projekt: nishithamv/Personalized-Voice-Synthesizer


def on_progress(p):
    for k,v in p.items():
        logging.debug("%s: %s" % (k, v))


file_name = 'dave/she_will_eat_rice_tomorrow'

with open(file_name+'.txt', encoding="utf-8") as fh:
    transcript = fh.read()

resources = gentle.Resources()
logging.info("converting audio to 8K sampled wav")

with gentle.resampled(file_name+'.wav') as wavfile:
    logging.info("starting alignment")
    aligner = gentle.ForcedAligner(resources, transcript, nthreads=nthreads, disfluency=disfluency, conservative=conservative, disfluencies=disfluencies)
    result = aligner.transcribe(wavfile, progress_cb=on_progress, logging=logging)

r = json.loads(result.to_json())
dur_list = []
phone_list = []

clip = AudioSegment.from_wav(file_name+'.wav')

for word in r['words']:
    dur_list.append([word['alignedWord'],word['start'],word['end']])
    start = word['start']*1000
    for phone in word['phones']:
        phone_list.append([phone['phone'], start, start+phone['duration']*1000])

Beispiel #30

0

Datei anzeigen

Datei: transcriber.py Projekt: alongreyber/speakup


if __name__ == '__main__':
    # full transcription
    from Queue import Queue
    import json
    import sys

    import logging
    logging.getLogger().setLevel('INFO')

    import gentle
    from gentle import standard_kaldi

    resources = gentle.Resources()

    k_queue = Queue()
    for i in range(3):
        k_queue.put(
            standard_kaldi.Kaldi(resources.nnet_gpu_path,
                                 resources.full_hclg_path,
                                 resources.proto_langdir))

    trans = MultiThreadedTranscriber(k_queue)

    with gentle.resampled(sys.argv[1]) as filename:
        out = trans.transcribe(filename)

    open(sys.argv[2],
         'w').write(transcription.Transcription(words=out).to_json())

Beispiel #31

0

Datei anzeigen

Datei: rename-alignments.py Projekt: rajivpoddar/text_audio_align

                                                 paragraph_end))
        if os.path.isfile(new_json_file):
            if args.abort:
                print(" aborting")
                break
        else:
            if not os.path.isfile(json_file):

                temp_wav = trim(file_id, wav, paragraph_start, paragraph_end,
                                0, "/tmp")

                if not os.path.isfile(temp_wav):
                    continue

                try:
                    with gentle.resampled(temp_wav) as wav_file:
                        resources = gentle.Resources()
                        cleaned = clean(paragraph)
                        aligner = gentle.ForcedAligner(
                            resources,
                            cleaned,
                            nthreads=multiprocessing.cpu_count() *
                            args.threads_multiplier,
                            disfluency=False,
                            conservative=False,
                            disfluencies=set(["uh", "um"]))
                        result = aligner.transcribe(wav_file)

                        aligned_words = result.to_json()
                        with open(json_file, "w") as f:
                            f.write(aligned_words)