def write_into_subtitle(response, output_path): print('Waiting for writing subtitle to complete...') # read the chinese punctuation with codecs.open(output_path + 'transcript-text.txt', 'r', 'utf-8') as reader: words = reader.read() punc_index_list = [] punc_index = 0 for w in words: if not w.isalpha() and w not in string.whitespace: punc_index_list.append(punc_index) punc_index += 1 elif w.isalpha(): punc_index += 1 with codecs.open(output_path + 'subtitle-no-punctuation.srt', 'w', 'utf-8') as writer: i = 1 # setting the sequence number for srt init = True # init flag word_index = 0 curr = 0 # current punctuation number for result in response.results: alternative = result.alternatives[0] line = "" # each line contain 10 words counter = 0 # word counter in a line # how many words remaining in this result num_woeds = len(alternative.words) start_next_para = True # loop the word in the result for word_info in alternative.words: word_index += 1 num_woeds -= 1 counter += 1 word = word_info.word if init: start_time = word_info.start_time str_start = timestr.timefm(start_time.seconds + start_time.nanos * 1e-9) init = False if start_next_para: start_time = word_info.start_time str_start = timestr.timefm(start_time.seconds + start_time.nanos * 1e-9) start_next_para = False if counter < 10: # when the num of word in this line less than # 10 word, we only add this word in this line line += word if word_index == (punc_index_list[curr]): curr += 1 line += ' ' word_index += 1 else: # the line is enouge 10 words, we inster seq num, # time and line into the srt file counter = 0 # clear the counter for nex iteration end_time = word_info.end_time str_end = timestr.timefm(end_time.seconds + end_time.nanos * 1e-9) writer.write(str(i)) # write the seq num into file, # and then add 1 i += 1 line += word if word_index == (punc_index_list[curr]): curr += 1 line += ' ' word_index += 1 writer.write('\n') writer.write(str_start) # write start time writer.write(' --> ') writer.write(str_end) # write end time writer.write('\n') writer.write(line) # write the word line = "" # clear the line for next iteration writer.write('\n\n') start_time = word_info.start_time str_start = timestr.timefm(start_time.seconds + start_time.nanos * 1e-9) # avoid miss any word, because counter < 0, # but this iteration has no word remain if counter < 10 and num_woeds == 0: end_time = word_info.end_time str_end = timestr.timefm(end_time.seconds + end_time.nanos * 1e-9) writer.write(str(i)) i += 1 writer.write('\n') writer.write(str_start) # write start time writer.write(' --> ') writer.write(str_end) # write end time writer.write('\n') writer.write(line) # write the word line = "" writer.write('\n\n')
def write_into_subtitle(response, output_path): print('Waiting for writing subtitle to complete...') with codecs.open(output_path + 'subtitle-raw.srt', 'w', 'utf-8') as writer: i = 1 # setting the sequence number for srt init = True # init flag for result in response.results: alternative = result.alternatives[0] line = "" # each line contain 10 words counter = 0 # word counter in a line # how many words remaining in this result num_woeds = len(alternative.words) start_next_para = True # loop the word in the result for word_info in alternative.words: num_woeds -= 1 counter += 1 word = word_info.word if init: start_time = word_info.start_time str_start = timestr.timefm(start_time.seconds + start_time.nanos * 1e-9) init = False if start_next_para: start_time = word_info.start_time str_start = timestr.timefm(start_time.seconds + start_time.nanos * 1e-9) start_next_para = False if counter < 10: # when the num of word in this line less than # 10 word, we only add this word in this line line += word else: # the line is enouge 10 words, we inster seq num, # time and line into the srt file counter = 0 # clear the counter for nex iteration end_time = word_info.end_time str_end = timestr.timefm(end_time.seconds + end_time.nanos * 1e-9) writer.write(str(i)) # write the seq num into file, # and then add 1 i += 1 line += word writer.write('\n') writer.write(str_start) # write start time writer.write(' --> ') writer.write(str_end) # write end time writer.write('\n') writer.write(line) # write the word line = "" # clear the line for next iteration writer.write('\n\n') start_time = word_info.start_time str_start = timestr.timefm(start_time.seconds + start_time.nanos * 1e-9) # avoid miss any word, because counter < 0, # but this iteration has no word remain if counter < 10 and num_woeds == 0: end_time = word_info.end_time str_end = timestr.timefm(end_time.seconds + end_time.nanos * 1e-9) writer.write(str(i)) i += 1 writer.write('\n') writer.write(str_start) # write start time writer.write(' --> ') writer.write(str_end) # write end time writer.write('\n') writer.write(line) # write the word line = "" writer.write('\n\n')