Ejemplo n.º 1
0
def cronista_transcribe(audio_source_path: str,
                        destination_folder: str,
                        block_of_transcription: int,
                        lang: str,
                        on_transcription_progress=None,
                        to_file: bool = True):
    audio_segments = split(audio_source_path, destination_folder,
                           block_of_transcription, to_file)
    transcribe(audio_segments, lang, on_transcription_progress)
Ejemplo n.º 2
0
    def browse(self):
        '''browse for file'''
        if self.wordlabel != "":  # clear the labels if necessary
            self.wordlabel.config(text="")
            self.pathlabel.config(text="")
            self.timestamplabel.config(text="")
            self.estimatelabel.config(text="")
        self.filename = askopenfilename(
        )  # openfile dialog and put file in filename
        if not self.filename:  # leave method if cancel is clicked
            return
        self.pathlabel.config(text=basename(
            self.filename))  # show filename as label
        if self.filename.endswith('.txt'):
            return
        self.workinglabel.config(text="WORKING",
                                 font=("Helvetica",
                                       20))  # Show WORKING when transcribing
        self.pbar_det.pack()  # show the progress bar
        self.pbar_det.start()  # Start the progress bar
        root.update()
        self.transcription = transcribe(self.filename)  # Call transcribe

        wordcloud_path = wordcloud_create(self.transcription)
        self.transcription = removerlap(self.transcription.split(' '))
        self.new_image(wordcloud_path)

        self.workinglabel.config(text="")  # remove working label
        self.pbar_det.stop()  # Stop progress bar
        self.pbar_det.pack_forget()  # Remove progress bar
Ejemplo n.º 3
0
def transcribe_into_paragraphs(wavfile_path, model_dir, minute_increment,
                               log_stream):
    with open(log_stream, 'a') as f:
        f.write('\nBeginning transcription\n')
    logging.info('Beginning transcription: ' + wavfile_path)

    sentences = transcriber.transcribe(wavfile_path, model_dir, log_stream)

    increment = minute_increment * 60
    paragraphs = []
    paragraph = ''
    for (timestamp, sentence) in sentences:
        paragraph += (' ' + sentence)

        if timestamp / increment > 1:
            paragraphs.append(paragraph)
            increment += increment
            paragraph = ''

    return paragraphs
Ejemplo n.º 4
0
def transcription_to_str(file, time_slot):
    transcription = transcribe(file_name=file)
    str_transcription = ''.join(transcription)
    d = '"{}", {}'.format(str_transcription, time_slot)
    return d
            for line in infile:
                line = line.rstrip()
                line=line.replace("<sil>", "");
                wordSet.add(line)

wordSet.remove("")

wordList = sorted(list(wordSet))

wordsStr = "\n".join(wordList)
print ">>>> wordsStr\n" + wordsStr

#automagical transformation
transcriber = transcriber.Transcriber()

phonemeSet = set([])

with open("../target/liepa.dic", "wb") as outfile:
    for i in range(len(wordList)):
        key = wordList[i]
        value = transcriber.transcribe(key)

        phonemeSet.update(value.split(" "))
        #print key +"\t"+ value
        outfile.write(key +"\t"+ value + "\n")


with open("../target/liepa.phone", "wb") as outfile:
    outfile.write("SIL\n")
    for phone in sorted(phonemeSet):
        outfile.write(phone + "\n")
Ejemplo n.º 6
0
def transcription_to_str(file, time_slot):
    transcription = transcribe(file_name=file)
    d = {}
    d['time_slot'] = time_slot
    d['str'] = ''.join(transcription)
    return d
Ejemplo n.º 7
0
                play("beep_lo.wav")
            if 'print' in args.ui_out:
                print 'Stopped recording, saving as file #{0} ...'.format(counter)
            file_out = os.path.join(dir_out, str(counter) + '.wav')
            save_as_wave(file_out, frames, args.channels, args.rate)
            counter += 1
    except EOFError:
        destroy()
        print ''
    except KeyboardInterrupt:
        destroy()
        print ''

    data = get_all_data(dir_out, counter)

    if data:
        file_out = os.path.join(dir_out, 'all.wav')
        save_data_as_wave(file_out, data, counter)
        if args.transcribe:
            # TODO: make more configurable
            file_out_transcription = os.path.join(dir_out, 'all.txt')
            transcription = transcriber.transcribe(file_out, args.uri_transcriber, '', 32000, None, None)
            with open(file_out_transcription, 'w') as text_file:
                #print(transcription, file=text_file)
                text_file.write(transcription)
        if args.playback:
            play(file_out)

    if args.nokeep:
        shutil.rmtree(dir_out)