コード例 #1
0
def json_to_srt(deepspeech_json, max_word_time=10, min_sub_time=1.5, max_sub_time=3):
    index = 0
    subtitle = ""
    start_time = 0
    end_time = 0
    subtitles = SubRipFile()

    for word in deepspeech_json["words"]:
        word["end_time"] = word["start_time"] + word["duration"]
        if word["duration"] < max_word_time:
            if start_time + max_sub_time >= word["end_time"] and subtitle:
                subtitle += " "
                subtitle += word["word"]
                end_time = max(word["end_time"], start_time + min_sub_time)
            elif subtitle:
                # Convert to milliseconds
                subtitles.append(
                    SubRipItem(index=++index, start=int(start_time*1000), end=int(end_time*1000), text=subtitle))
                subtitle = ""

            if not subtitle:
                start_time = word["start_time"]
                subtitle += word["word"]
                end_time = max(word["end_time"], start_time + min_sub_time)

    if subtitle:
        subtitles.append(SubRipItem(index=++index, start=int(start_time*1000), end=int(end_time*1000), text=subtitle))
    return subtitles
コード例 #2
0
    def process(self, subs: SubRipFile, items: List[PgsSubtitleItem],
                post_process, confidence: int, max_width: int):
        full_image = FullImage.from_items(items, self.gap, max_width)

        config = {'output_type': tess.Output.DICT, 'config': '--psm 11'}

        if self.pgs.language:
            config.update({'lang': self.pgs.language.alpha3})

        if self.omp_thread_limit:
            os.environ['OMP_THREAD_LIMIT'] = str(self.omp_thread_limit)
        # cv2.imwrite(f'{subs.path}-{len(items)}-{confidence}.png', full_image.data)
        data = TsvData(tess.image_to_data(full_image.data, **config))

        remaining = []
        for item in items:
            text = self.accept(data, item, confidence)
            if text is None:
                remaining.append(item)
                continue

            text = item.text
            if post_process:
                text = post_process(text)
            if text:
                item = SubRipItem(0, item.start, item.end, text)
                subs.append(item)

        return remaining
コード例 #3
0
def to_srt(df, filename):
    out = SubRipFile(encoding='utf-8')
    for i, r in df.iterrows():
        begin = convert_time(r['begin'])
        end = convert_time(r['end'])
        out.append(SubRipItem(0, begin, end, r['text']))
    out.save(filename)
コード例 #4
0
ファイル: videos.py プロジェクト: eres805/congress
def get_captions(client_name, clip_id):
    h = httplib2.Http()
    g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
    print "Fetching URL: %s" % g_url

    try:
        response, j = h.request(g_url)
    except httplib.BadStatusLine as exception:
        return None

    dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
    filename = dirname + "%s.srt" % clip_id
    subs = SubRipFile()

    if response.get('status') == '200':
        captions = []
        try:
            j = json.loads(j, strict=False)[0]
        except ValueError:
            ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
            try:
                j = json.loads(ts, strict=False)[0]
            except UnicodeDecodeError:
                ts = unicode(ts, errors='ignore')
                j = json.loads(ts, strict=False)[0]
        except:
            j = False

        sub_count = 0
        for item in j:
            if item["type"] == "text":
                cap = item["text"]
                offset = round(float(item["time"]), 3)
                captions.append({'time': offset, 'text': cap})
                end = get_cap_end(j, sub_count)
                if end:
                    subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
                    subs.append(subtitle)

            sub_count = sub_count + 1

        try:
            subs.save(path=filename, encoding="utf-8")
        except IOError:
            p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
            t = p.wait()

            subs.save(path=filename, encoding="utf-8")

        s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
        return (captions, s3_url)
    else:
        return ([], '')
コード例 #5
0
def get_captions(client_name, clip_id):
    h = httplib2.Http()
    g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
    print "Fetching URL: %s" % g_url
    response, j = h.request(g_url)
    dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
    filename = dirname + "%s.srt" % clip_id
    subs = SubRipFile()

    if response.get('status') == '200':
        captions = []
        try:
            j = json.loads(j, strict=False)[0]
        except ValueError:
            ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
            try:
                j = json.loads(ts, strict=False)[0]
            except UnicodeDecodeError:
                ts = unicode(ts, errors='ignore')
                j = json.loads(ts, strict=False)[0]
        except:
            j = False

        sub_count = 0
        for item in j: 
            if item["type"] == "text":
                cap = item["text"]
                offset = round(float(item["time"]), 3)
                captions.append({'time': offset, 'text': cap})        
                end = get_cap_end(j, sub_count)
                if end:
                    subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
                    subs.append(subtitle)
           
            sub_count = sub_count + 1
        
        try:
            subs.save(path=filename, encoding="utf-8")
        except IOError:
            p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
            t = p.wait()

            subs.save(path=filename, encoding="utf-8")
            
        s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
        return (captions, s3_url)
    else:
        return ([], '')
コード例 #6
0
ファイル: text.py プロジェクト: hugovk/pliers
    def save(self, path):
        if path.endswith('srt'):
            verify_dependencies(['pysrt'])
            from pysrt import SubRipFile, SubRipItem
            from datetime import time

            out = SubRipFile()
            for elem in self._elements:
                start = time(*self._to_tup(elem.onset))
                end = time(*self._to_tup(elem.onset + elem.duration))
                out.append(SubRipItem(0, start, end, elem.text))
            out.save(path)
        else:
            with open(path, 'w') as f:
                f.write('onset\ttext\tduration\n')
                for elem in self._elements:
                    f.write('{}\t{}\t{}\n'.format(elem.onset, elem.text,
                                                  elem.duration))
コード例 #7
0
ファイル: text.py プロジェクト: tyarkoni/featureX
    def save(self, path):
        if path.endswith('srt'):
            verify_dependencies(['pysrt'])
            from pysrt import SubRipFile, SubRipItem
            from datetime import time

            out = SubRipFile()
            for elem in self._elements:
                start = time(*self._to_tup(elem.onset))
                end = time(*self._to_tup(elem.onset + elem.duration))
                out.append(SubRipItem(0, start, end, elem.text))
            out.save(path)
        else:
            with open(path, 'w') as f:
                f.write('onset\ttext\tduration\n')
                for elem in self._elements:
                    f.write('{}\t{}\t{}\n'.format(elem.onset,
                                                  elem.text,
                                                  elem.duration))
コード例 #8
0
ファイル: subtitle.py プロジェクト: cash2one/AutoSystem
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
    """
    合并两种不同言语的srt字幕

    因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
    导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免

    参考https://github.com/byroot/pysrt/issues/17

    https://github.com/byroot/pysrt/issues/15

    :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
    :param sub_b:
    :param delta:
    :return:
    """
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #9
0
ファイル: subtitle.py プロジェクト: GoTop/AutoSystem
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
    """
    合并两种不同言语的srt字幕

    因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
    导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免

    参考https://github.com/byroot/pysrt/issues/17

    https://github.com/byroot/pysrt/issues/15

    :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
    :param sub_b:
    :param delta:
    :return:
    """
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #10
0
ファイル: mvyskoc_merge.py プロジェクト: EdwardBetts/pysrt
def merge_subtitle(sub_a, sub_b, delta):
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i-1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end-start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #11
0
def merge_subtitle(sub_a, sub_b, delta):
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in range(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #12
0
                      chat_channel,
                      chat_server[0],
                      chat_server[1],
                      twitchclient_version=twitchclient_version)

outsrt = SubRipFile()

text = ''

while 1:
    raw_msg_list = bot.get_message()
    if len(raw_msg_list) > 0:
        if len(text) > 0:
            end = SubRipTime.from_time(datetime.now())
            item = SubRipItem(0, start, end, text)
            outsrt.append(item)
        start = SubRipTime.from_time(datetime.now())
        text = ''
        timestamp = get_timestamp(timestamp_format)
        for item in raw_msg_list:
            if record_raw:
                log_add(raw_log_path, timestamp + ' ' + item + '\n')
            username, message = irc_bot.parse_user(item)
            if username != '':
                safe_print(chat_channel + " " + username + ": " + message)
                log_add(log_path,
                        timestamp + ' ' + username + ': ' + message + '\n')
                text += username + ": " + message + '\n'
                outsrt.clean_indexes()
                outsrt.save(srt_log_path, encoding='utf-8')
コード例 #13
0
srt = SubRipFile()

# get all DisplaySets that contain an image
print("Loading DisplaySets...")
allsets = [ds for ds in tqdm(pgs.iter_displaysets())]

print(f"Running OCR on {len(allsets)} DisplaySets and building SRT file...")
subText = ""
subStart = 0
subIndex = 0
for ds in tqdm(allsets):
    if ds.has_image:
        # get Palette Display Segment
        pds = ds.pds[0]
        # get Object Display Segment
        ods = ds.ods[0]

        # img = make_image(ods, pds)
        # subText = pytesseract.image_to_string(img)
        subStart = ods.presentation_timestamp
    else:
        startTime = SubRipTime(milliseconds=int(subStart))
        endTime = SubRipTime(
            milliseconds=int(ds.end[0].presentation_timestamp))
        srt.append(SubRipItem(subIndex, startTime, endTime, "subText"))
        subIndex += 1

print(f"Done. SRT file saved as {srtFile}")
srt.save(srtFile, encoding='utf-8')
コード例 #14
0
ファイル: sublog_to_srt.py プロジェクト: WNRI/errantia
srt = SubRipFile(eol='\n', encoding='utf-8')
i = 1

for line in sublog:
    line = line.split(",", 1)
    if (line[0] and line[0][0] == '-'):
        if (START_TIME == None and line[0][:8] == '- start '):
            START_TIME = datetime.strptime(line[0], '- start ' + TIMEFORMAT +
            '\n')
        continue

    no = datetime.strptime(line[0], TIMEFORMAT) - START_TIME
    if (abs(no) > timedelta(1)):
        print("\nCan't go over a day in a subtitle! Delete non-used lines in" + \
                " log.\nLet there only be one '- start' line at the top of" + \
                " the log-file.")
        sys.exit(1)

    time = SubRipTime.from_ordinal(no.seconds*1000 + no.microseconds*0.001)

    item = SubRipItem(i, start=time, end=time + 30*1000,
            text=unicode(line[1], 'utf-8'))
    srt.append(item)
    i += 1

srt.clean_indexes()
#srt.save(path=sys.stdout)

for line in srt:
    sys.stdout.write(unicode(line).encode('utf-8'))
コード例 #15
0
def handle_tracks(tracks, start, end, fps, srt_filename):
    global XML_FILENAME, HUE_SAMPLING, DMX_SAMPLING, TRANSITION_TIME, DEBUG, VERBOSE
    track_list = []
    for track in tracks:
        track_list = handle_track_list(track, start, end, fps)
        # print(track_list[3][0])
        # try:
        #     print(len(track_list[3]),len(track_list[3][0]),track_list[3][0][1:10],track_list[3][-1][1:10])
        # except:
        #     pass

    # srt_file = open(srt_filename,"w")

    dmx_frame = zeros(512)
    prev_dmx_frame = zeros(512)
    prev_dmx_valid_frame = zeros(512)

    subrip_file = SubRipFile(path=srt_filename)

    print(40 * "-")
    print("Processing frames")
    print(40 * "-")
    # print(track_list[3][1])
    # print(len(track_list[1]))

    if len(track_list[1]) > 0:
        # If there isn't only an audio track
        # print(track_list[1][0])
        # print(track_list[1][0]!="audio")
        # print(len(track_list[1]) != 1 and track_list[1][0]!="audio")
        if (len(track_list[1]) != 1 or track_list[1][0] != "audio"):
            print("Number of lighting events: ", len(track_list[3][0]))
            frame_no = 0
            for i in range(len(track_list[3][0])):
                # frame_no = track_list[4][i]
                frame_no = i
                t = i * (1.0 / float(fps))
                if VERBOSE:
                    print(40 * "-")
                    # print(frame_no,fps)
                    print("Frame %s / time %s seconds" % (frame_no, t))
                    print(40 * "-")
                hue_cmd = ""
                dmx_cmd = ""
                # for the bug, len(of track_list[0]) is greater than
                # len(track_list[3])
                for j in range(len(track_list[0])):
                    # print(track_list[1][j])
                    if track_list[1][j] != "audio":
                        name = track_list[0][j]
                        type = track_list[1][j]
                        addr = track_list[2][j]
                        # print(name,type,addr)
                        # TODO: if frame_no = i as on line 181, the following line fails!
                        # [3][j] is out of range therefore j is the problem
                        try:
                            payload = track_list[3][j][i]
                        except Exception as e:
                            print(
                                'ERROR: could not get payload, len(of track_list[0]) is likely greater than \
                            len (track_list[3])')
                        # print(name, type, addr, payload)
                        # Convert Hue payload to hue command
                        if payload != "":
                            if addr[1:4].lower(
                            ) == "hue" and type == "OSCColor/floatarray":
                                if VERBOSE:
                                    print("hue", addr, payload)
                                r, g, b, a = 0, 0, 0, 0
                                try:
                                    payload_list = payload.split(",")
                                    # print(payload_list)
                                    if len(payload_list) == 3:
                                        r, g, b = payload_list
                                    elif len(payload_list) == 4:
                                        r, g, b, a = payload_list
                                except Exception as e:
                                    print(e)

                                h, s, v = rgb_to_hsv(float(r), float(g),
                                                     float(b))

                                h *= 65535.0
                                s *= 254.0
                                v *= 254.0

                                h = int(h)
                                s = int(s)
                                v = int(v)
                                # print("hue", addr, payload, h,s,v)
                                n = int(addr[4:])
                                # print("hue", n, h,s,v)
                                if len(hue_cmd) == 0:
                                    hue_cmd += "HUE%s(%s,%s,%s,%s)" % (
                                        n, h, s, v, TRANSITION_TIME)
                                else:
                                    hue_cmd += ";HUE%s(%s,%s,%s,%s)" % (
                                        n, h, s, v, TRANSITION_TIME)
                            # Convert single DMX channel to command
                            elif addr[1:4].lower(
                            ) == "dmx" and type == "OSCValue/float":
                                if VERBOSE:
                                    print("dmx value", addr, payload)
                                n = int(addr[4:])
                                if payload != "":
                                    dmx_frame[int(n)] = int(
                                        float(payload) * 254)
                            # Convert multiple DMX channels to command
                            elif addr[1:4].lower() == "dmx" and (
                                    type == "OSCColor/floatarray"
                                    or type == "OSCValue/standard"):
                                if VERBOSE:
                                    print("dmx colour", addr, payload)
                                n = int(addr[4:])
                                if payload != "":
                                    payload_list = payload.split(",")
                                    for channel in payload_list:
                                        dmx_frame[int(n)] = int(
                                            float(channel) * 254)
                                        n += 1

                # Output HUE commands
                # hue_t = frame_no * (1.0/HUE_SAMPLING)
                if frame_no % fps == 0 and hue_cmd != "":
                    item = SubRipItem(frame_no, text=hue_cmd)
                    item.shift(seconds=t)
                    item.end.shift(seconds=1)
                    if VERBOSE:
                        print(item)
                    else:
                        print("h", end="")
                        stdout.flush()
                    subrip_file.append(item)
                    frame_no += 1

                # Output DMX command
                dmx_frame_trimmed = trim_zeros(dmx_frame, 'b').astype('uint8')

                # print("dmx_frame_trimmed before",dmx_frame_trimmed)

                # if len(dmx_frame_trimmed)==0:
                #     dmx_frame_trimmed = zeros(512)

                # print("dmx_frame_trimmed after",dmx_frame_trimmed)

                dmx_cmd = "DMX1" + str(tuple(dmx_frame_trimmed)[1:]).replace(
                    " ", "")

                if VERBOSE:
                    print('dmx_cmd to be written: ', dmx_cmd)

                # cmd = hue_cmd + ";" + dmx_cmd
                if (not array_equal(dmx_frame_trimmed,
                                    prev_dmx_frame)) or (frame_no % fps == 0):
                    # if frame_no % fps == 0 and dmx_cmd=="":
                    # if frame_no % fps == 0:
                    #     print(dmx_cmd, prev_dmx_frame)

                    # Fix for and empty DMX command
                    # Usually found at the start of a treatment track
                    if dmx_cmd == "DMX1()":
                        item = dmx_cmd = "DMX1" + str(
                            tuple(zeros(512, dtype=int))[1:]).replace(" ", "")

                    item = SubRipItem(frame_no, text=dmx_cmd)
                    item.shift(seconds=t)
                    item.end.shift(seconds=1.0 / fps)

                    if VERBOSE:
                        print(item)
                    else:
                        print("d", end="")
                        stdout.flush()

                    subrip_file.append(item)
                    frame_no += 1
                prev_dmx_frame = dmx_frame_trimmed
                # print(cmd)
                if VERBOSE:
                    print(40 * "-")
                    # print(track_list[0][j], track_list[1][j], track_list[2][j], track_list[3][j][i])
                    # print(frame)
                    # j = 1
                    # for frame in track:
                    #     print(track_list[0][i] + " " +frame, end = " ")
                    #     j += 1
                    # print()
    encoding = "utf_8"
    subrip_file.save(srt_filename, encoding=encoding)
    print()
コード例 #16
0
srt_log_path = current_directory + '/comment_log/' + chat_channel + '.srt'

bot = irc_bot.irc_bot(username, oauth, chat_channel, chat_server[0], chat_server[1], twitchclient_version = twitchclient_version)

outsrt = SubRipFile()

text = ''

while 1:
	raw_msg_list = bot.get_message()
	if len(raw_msg_list) > 0:
		if len(text) > 0:
			end = SubRipTime.from_time(datetime.now())
			item = SubRipItem(0, start, end, text)
			outsrt.append(item)
		start = SubRipTime.from_time(datetime.now())
		text = ''
		timestamp = get_timestamp(timestamp_format)
		for item in raw_msg_list:
			if record_raw:
				log_add(raw_log_path, timestamp + ' ' + item + '\n')
			username, message = irc_bot.parse_user(item)
			if username != '':
				safe_print(chat_channel + " " + username + ": " + message)
				log_add(log_path, timestamp + ' ' + username + ': ' + message + '\n')
				text += username + ": " + message + '\n'
				outsrt.clean_indexes()
				outsrt.save(srt_log_path, encoding='utf-8')

コード例 #17
0
def syncSrts(subs_L1, subs_L2):
    """Sync subs_L1 by subs_L2 timings and return a SubRipFile.
    """

    out = SubRipFile()
    subs_L2_out = SubRipFile()

    j = 0
    last_j = -1
    dupes = 0
    L2_ind = -1

    for L2_sub in subs_L2:
        L2_ind = L2_ind + 1
        start = L2_sub.start
        end = L2_sub.end
        j = matchSubtitle(subs_L1, start, end, max(last_j, 0))
        L1_sub = subs_L1[j] if (j > -1) else None

        if L1_sub is None:
            text = L2_sub.text
            print("---- Missing: {}: {}".format(
                L2_sub.index, L2_sub.text.replace("\n", "[[NL]]")))
        else:
            text = L1_sub.text

            if j - 1 > last_j and last_j > -1:
                # we skipped a sub in L1_subs
                if isSubMatch(subs_L1[j - 1], subs_L2[L2_ind - 1].start,
                              subs_L2[L2_ind - 1].end):
                    out[len(out) -
                        1].text = out[len(out) -
                                      1].text + "\n" + subs_L1[j - 1].text
                elif isSubMatch(subs_L1[j - 1], start, end):
                    text = subs_L1[j - 1].text + "\n" + text
                else:
                    # A sub line in L1 does not match any in L2
                    # We add it to synced L1, and add an empty one to subs L2
                    item = SubRipItem(0, subs_L1[j - 1].start,
                                      subs_L1[j - 1].end, subs_L1[j - 1].text)
                    out.append(item)
                    item2 = SubRipItem(0, subs_L1[j - 1].start,
                                       subs_L1[j - 1].end, " ")
                    subs_L2_out.append(item2)

            if j == last_j:
                dupes = dupes + 1
                #print("---- OOPS. {}: {} - {}".format(L2_sub.index, L2_sub.text.replace("\n",""), L1_sub.text.replace("\n","")))
            last_j = j

        item = SubRipItem(0, start, end, text)
        out.append(item)

        item2 = SubRipItem(0, start, end, L2_sub.text)
        subs_L2_out.append(item2)

    out.clean_indexes()
    subs_L2_out.clean_indexes()

    fixed = 0
    for i in range(1, len(out)):
        sub1 = out[i - 1].text
        sub2 = out[i].text
        if ((sub1 == sub2)
                and (subs_L2_out[i - 1].text != subs_L2_out[i].text)):
            if (trySplitLine(out, i, sub1)):
                fixed = fixed + 1
                i = i + 1
            else:
                print("---- Oy. {}: {} not fixed".format(
                    i, sub1.replace("\n", "[[NL]]")))

    return out, dupes, fixed, subs_L2_out