예제 #1
0
def write_into_subtitle(response, output_path):

    print('Waiting for writing subtitle to complete...')

    # read the chinese punctuation
    with codecs.open(output_path + 'transcript-text.txt', 'r',
                     'utf-8') as reader:
        words = reader.read()
        punc_index_list = []
        punc_index = 0
        for w in words:
            if not w.isalpha() and w not in string.whitespace:
                punc_index_list.append(punc_index)
                punc_index += 1
            elif w.isalpha():
                punc_index += 1

    with codecs.open(output_path + 'subtitle-no-punctuation.srt', 'w',
                     'utf-8') as writer:
        i = 1  # setting the sequence number for srt
        init = True  # init flag
        word_index = 0
        curr = 0  # current punctuation number
        for result in response.results:
            alternative = result.alternatives[0]
            line = ""  # each line contain 10 words
            counter = 0  # word counter in a line
            # how many words remaining in this result
            num_woeds = len(alternative.words)
            start_next_para = True
            # loop the word in the result
            for word_info in alternative.words:
                word_index += 1
                num_woeds -= 1
                counter += 1
                word = word_info.word
                if init:
                    start_time = word_info.start_time
                    str_start = timestr.timefm(start_time.seconds +
                                               start_time.nanos * 1e-9)
                    init = False
                if start_next_para:
                    start_time = word_info.start_time
                    str_start = timestr.timefm(start_time.seconds +
                                               start_time.nanos * 1e-9)
                    start_next_para = False

                if counter < 10:
                    # when the num of word in this line less than
                    # 10 word, we only add this word in this line
                    line += word
                    if word_index == (punc_index_list[curr]):
                        curr += 1
                        line += ' '
                        word_index += 1
                else:
                    # the line is enouge 10 words, we inster seq num,
                    # time and line into the srt file
                    counter = 0  # clear the counter for nex iteration
                    end_time = word_info.end_time
                    str_end = timestr.timefm(end_time.seconds +
                                             end_time.nanos * 1e-9)
                    writer.write(str(i))  # write the seq num into file,
                    # and then add 1
                    i += 1
                    line += word
                    if word_index == (punc_index_list[curr]):
                        curr += 1
                        line += ' '
                        word_index += 1
                    writer.write('\n')
                    writer.write(str_start)  # write start time
                    writer.write(' --> ')
                    writer.write(str_end)  # write end time
                    writer.write('\n')
                    writer.write(line)  # write the word
                    line = ""  # clear the line for next iteration
                    writer.write('\n\n')
                    start_time = word_info.start_time
                    str_start = timestr.timefm(start_time.seconds +
                                               start_time.nanos * 1e-9)

                # avoid miss any word, because counter < 0,
                # but this iteration has no word remain
                if counter < 10 and num_woeds == 0:
                    end_time = word_info.end_time
                    str_end = timestr.timefm(end_time.seconds +
                                             end_time.nanos * 1e-9)

                    writer.write(str(i))
                    i += 1
                    writer.write('\n')
                    writer.write(str_start)  # write start time
                    writer.write(' --> ')
                    writer.write(str_end)  # write end time
                    writer.write('\n')
                    writer.write(line)  # write the word
                    line = ""
                    writer.write('\n\n')
예제 #2
0
def write_into_subtitle(response, output_path):

    print('Waiting for writing subtitle to complete...')

    with codecs.open(output_path + 'subtitle-raw.srt', 'w', 'utf-8') as writer:
        i = 1  # setting the sequence number for srt
        init = True  # init flag
        for result in response.results:
            alternative = result.alternatives[0]
            line = ""  # each line contain 10 words
            counter = 0  # word counter in a line
            # how many words remaining in this result
            num_woeds = len(alternative.words)
            start_next_para = True
            # loop the word in the result
            for word_info in alternative.words:
                num_woeds -= 1
                counter += 1
                word = word_info.word
                if init:
                    start_time = word_info.start_time
                    str_start = timestr.timefm(start_time.seconds +
                                               start_time.nanos * 1e-9)
                    init = False
                if start_next_para:
                    start_time = word_info.start_time
                    str_start = timestr.timefm(start_time.seconds +
                                               start_time.nanos * 1e-9)
                    start_next_para = False

                if counter < 10:
                    # when the num of word in this line less than
                    # 10 word, we only add this word in this line
                    line += word
                else:
                    # the line is enouge 10 words, we inster seq num,
                    # time and line into the srt file
                    counter = 0  # clear the counter for nex iteration
                    end_time = word_info.end_time
                    str_end = timestr.timefm(end_time.seconds +
                                             end_time.nanos * 1e-9)
                    writer.write(str(i))  # write the seq num into file,
                    # and then add 1
                    i += 1
                    line += word
                    writer.write('\n')
                    writer.write(str_start)  # write start time
                    writer.write(' --> ')
                    writer.write(str_end)  # write end time
                    writer.write('\n')
                    writer.write(line)  # write the word
                    line = ""  # clear the line for next iteration
                    writer.write('\n\n')
                    start_time = word_info.start_time
                    str_start = timestr.timefm(start_time.seconds +
                                               start_time.nanos * 1e-9)

                # avoid miss any word, because counter < 0,
                # but this iteration has no word remain
                if counter < 10 and num_woeds == 0:
                    end_time = word_info.end_time
                    str_end = timestr.timefm(end_time.seconds +
                                             end_time.nanos * 1e-9)

                    writer.write(str(i))
                    i += 1
                    writer.write('\n')
                    writer.write(str_start)  # write start time
                    writer.write(' --> ')
                    writer.write(str_end)  # write end time
                    writer.write('\n')
                    writer.write(line)  # write the word
                    line = ""
                    writer.write('\n\n')