Exemple #1
0
def pysubs2_ssa_event_add(
    src_ssafile,
    dst_ssafile,
    text_list,
    style_name,
):
    """
    Serialize a list of subtitles according to the SRT format.
    """
    if not style_name:
        style_name = 'Default'
    if text_list:
        if not src_ssafile:
            if isinstance(text_list[0][0], tuple):
                # text_list is [((start, end), text), ...]
                # text_list provides regions
                for ((start, end), text) in text_list:
                    event = pysubs2.SSAEvent()
                    event.start = start
                    event.end = end
                    event.text = text
                    event.style = style_name
                    dst_ssafile.events.append(event)
            elif isinstance(text_list[0][0], int):
                # text_list is [(start, end), ...]
                # text_list provides regions only
                for start, end in text_list:
                    event = pysubs2.SSAEvent()
                    event.start = start
                    event.end = end
                    event.style = style_name
                    dst_ssafile.events.append(event)
        else:
            # if src_ssafile exist
            # src_ssafile provides regions
            # text_list is [text, text, ...]
            i = 0
            length = len(text_list)
            while i < length:
                event = pysubs2.SSAEvent()
                event.start = src_ssafile.events[i].start
                event.end = src_ssafile.events[i].end
                event.text = text_list[i]
                event.style = style_name
                dst_ssafile.events.append(event)
                i = i + 1
    else:
        # src_ssafile provides regions only
        i = 0
        length = len(src_ssafile.events)
        while i < length:
            event = pysubs2.SSAEvent()
            event.start = src_ssafile.events[i].start
            event.end = src_ssafile.events[i].end
            event.style = style_name
            dst_ssafile.events.append(event)
            i = i + 1
    def _create_bullets(self, content, animation):
        """Add vocabulary information into subtitle object

    Args:
      content (list): vocabulary information with time stamp
      animation (bool): whether using animation in ass
    """
        style = self._subs.styles["Default"].copy()
        style.alignment = 7
        style.fontsize = 13
        style.borderstyle = 1
        style.shadow = 0.5  # shadow: 0.5 px
        style.backcolor = pysubs2.Color(
            0, 0, 0, 100)  # shadow color: black with (255-100)/255 transparent
        style.outline = 0.5  # font outline: 0.5 px
        style.outlinecolor = pysubs2.Color(
            0, 0, 0, 20)  # outline color: black with (255-20)/255 transparent
        style.marginl = 70
        style.marginv = 30
        style.primarycolor = pysubs2.Color(
            255, 255, 255, 0)  # font color: white, no transparent
        self._subs.styles["Bullet"] = style
        for s in self._subs:
            s.text = s.text.replace("\\N", " ")
        for bullet in content:
            ws = "\\N".join([
                "\\h\\h\\h\\h".join([
                    "{\c&H58E08F&}" + w["word"],
                    "{\\c&HFFFFFF&}" + w["meaning"],
                    "{\\c&H2AD6C4&}" + "[" + w["dict_pos"] + "]"
                ]) for w in bullet["words"]
            ])
            start = pysubs2.time.timestamp_to_ms(
                pysubs2.time.TIMESTAMP.match(bullet["start"]).groups())
            end = pysubs2.time.timestamp_to_ms(
                pysubs2.time.TIMESTAMP.match(bullet["end"]).groups())
            if animation:
                event = pysubs2.SSAEvent(start=start,
                                         end=end,
                                         text=ws,
                                         style="Bullet",
                                         effect="Scroll up;10;110;" +
                                         str(100000 / (0.90 * (end - start))))
            else:
                event = pysubs2.SSAEvent(start=start,
                                         end=end,
                                         text=ws,
                                         style="Bullet")
            self._subs.append(event)
Exemple #3
0
 def man_get_vtt_words_index(self):
     """
     Get end timestamps from a SSAEvent list automatically by external regions.
     """
     events = []
     path = self.path[:-3] + "txt"
     path = str_to_file(
         str_=self.to_text_str(),
         output=path,
         input_m=input)
     input(_("Wait for the events manual adjustment. "
             "Press Enter to continue."))
     with open(path, encoding=constants.DEFAULT_ENCODING) as file_p:
         i = 0
         for line in file_p:
             word_list = line.split()
             event = pysubs2.SSAEvent(start=self.vtt_words[i].start)
             for word in word_list:
                 self.vtt_words[i].word = word
                 i = i + 1
             self.vtt_words_index.append(i)
             if i:
                 event.end = self.vtt_words[i - 1].end
             events.append(event)
     constants.delete_path(path)
     return events
Exemple #4
0
 def add(self, begin, end, text):
     entry = parseLine(text)
     event = pysubs2.SSAEvent(type='Dialogue',
                              start=begin * 1000.0,
                              end=end * 1000.0,
                              **entry)
     self.insert(bisect.bisect_left(self, event), event)
Exemple #5
0
 def add(self, begin, end, text):
     if text.startswith('[Script Info]'):
         self.setHeader(text)
     else:
         entry = parseLine(text)
         event = pysubs2.SSAEvent(type='Dialogue',
                                  start=begin * 1000.0,
                                  end=end * 1000.0,
                                  **entry)
         self.insert(bisect.bisect_left(self, event), event)
def convert_yt_comments(jsonname, comment_duration, video_info, outputname):
    with open(jsonname) as f:
        yt_comments = json.load(f)

    if len(yt_comments) == 0:
        return

    subs = pysubs2.SSAFile()
    subs.info["PlayResX"] = 384
    subs.info["PlayResY"] = 288

    start_time_shift = yt_comments[0]["time_in_seconds"] * 1000

    comment_channel = []
    comment_size = 20
    for i in range(0, subs.info["PlayResY"], comment_size):
        comment_channel.append(None)

    for msg in yt_comments:
        now = msg["time_in_seconds"] * 1000
        if now > video_info["duration"] * 1000:
            #            print(now, ">", video_info["duration"] * 1000)
            continue

        if not msg["message"]:
            continue

        selected_channel = 1
        for index, chan in enumerate(comment_channel):
            if (not chan or chan["time_in_seconds"] * 1000 +
                (200 * len(msg["message"])) < now):
                comment_channel[index] = msg
                selected_channel = index + 1
                break

        movement = ("{\move(414," + str(selected_channel * 20) + ",-30," +
                    str(selected_channel * 20) + ",0," +
                    str(comment_duration) + ")}")

        subs.append(
            pysubs2.SSAEvent(
                start=pysubs2.make_time(ms=msg["time_in_seconds"] * 1000),
                end=pysubs2.make_time(ms=(msg["time_in_seconds"] * 1000) +
                                      comment_duration),
                text=movement + msg["message"]))

    subs.shift(ms=-start_time_shift + 100)
    subs.save(outputname)
Exemple #7
0
def add_credits(subs):
    """Add credits to the software at the end of the subtitle SRT file.

    Parameters
    ----------
        subs : `pysubs2.ssafile.SSAFile`, required
            subtitle object with all the subtitle events
    
    Returns
    -------
        `pysubs2.ssafile.SSAFile` : subtitle object with credits appended at the end
    """

    start = int(subs[-1].end) + 2000
    end = start + 2000
    text = "Processed with <i>ReSuber</i>.\nCheck the github page <font color=\"blue\"> https://github.com/polak0v/ReSuber </font> !"
    event = pysubs2.SSAEvent(start=start, end=end, text=text)
    subs += [event]

    return subs
def make_ass(wav, segments, transcriptions, utt2spk, ass):
    """
    Формирование .ASS файла из транскрибаций
    
    Аргументы:
       wav: наименование аудио файла
       segments: путь к файлу описания сегментов
       transcriptions: путь к файлу транскрибации
       utt2spk: путь к файлу сопоставления сегментов и говорящих
       ass: путь к .ASS файлу субтитров
    """
    sub = pysubs2.SSAFile()
    sub.info['Title'] = 'Default Aegisub file'
    sub.info['YCbCr Matrix'] = 'None'
    sub.aegisub_project['Audio File'] = wav
    sub.aegisub_project['Scroll Position'] = 0
    sub.aegisub_project['Active Line'] = 0
    segments_df = pd.read_csv(segments,
                              header=None,
                              sep=' ',
                              names=['utt_id', 'wav', 'start', 'end'])
    transcriptions_df = pd.read_csv(transcriptions,
                                    sep='\t',
                                    header=None,
                                    names=['utt_id', 'text'])
    utt2spk_df = pd.read_csv(utt2spk,
                             sep='\t',
                             header=None,
                             names=['utt_id', 'speaker'])
    events = segments_df.merge(transcriptions_df, how='left',
                               on='utt_id').merge(utt2spk_df,
                                                  how='left',
                                                  on='utt_id').fillna('')
    for row in events.values:
        event = pysubs2.SSAEvent(start=pysubs2.make_time(s=float(row[2])),
                                 end=pysubs2.make_time(s=float(row[3])),
                                 text=row[4],
                                 name=row[5])
        sub.events.append(event)
    sub.sort()
    sub.save(ass, format_='ass')
Exemple #9
0
def auditok_gen_speech_regions(  # pylint: disable=too-many-arguments
        audio_wav,
        energy_threshold=constants.DEFAULT_ENERGY_THRESHOLD,
        min_region_size=constants.DEFAULT_MIN_REGION_SIZE,
        max_region_size=constants.DEFAULT_MAX_REGION_SIZE,
        max_continuous_silence=constants.DEFAULT_CONTINUOUS_SILENCE,
        mode=auditok.StreamTokenizer.STRICT_MIN_LENGTH,
        is_ssa_event=False):
    """
    Give an input audio/video file, generate proper speech regions.
    """
    asource = auditok.ADSFactory.ads(
        filename=audio_wav, record=True)
    validator = auditok.AudioEnergyValidator(
        sample_width=asource.get_sample_width(),
        energy_threshold=energy_threshold)
    asource.open()
    tokenizer = auditok.StreamTokenizer(
        validator=validator,
        min_length=int(min_region_size * 100),
        max_length=int(max_region_size * 100),
        max_continuous_silence=int(max_continuous_silence * 100),
        mode=mode)

    # auditok.StreamTokenizer.DROP_TRAILING_SILENCE
    tokens = tokenizer.tokenize(asource)
    regions = []
    if not is_ssa_event:
        for token in tokens:
            # get start and end times
            regions.append((token[1] * 10, token[2] * 10))
    else:
        for token in tokens:
            # get start and end times
            regions.append(pysubs2.SSAEvent(
                start=token[1] * 10,
                end=token[2] * 10))
    asource.close()
    # reference
    # auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data
    return regions
Exemple #10
0
def auditok_gen_stats_regions(
        auditok_stats,
        asource
):
    """
    Give an AuditokSTATS and return itself with regions.
    """
    validator = auditok.AudioEnergyValidator(
        sample_width=asource.get_sample_width(),
        energy_threshold=auditok_stats.energy_t)
    asource.open()
    tokenizer = auditok.StreamTokenizer(
        validator=validator,
        min_length=int(auditok_stats.mnrs * 100),
        max_length=int(auditok_stats.mxrs * 100),
        max_continuous_silence=int(auditok_stats.mxcs * 100),
        mode=auditok_stats.mode)

    # auditok.StreamTokenizer.DROP_TRAILING_SILENCE
    tokens = tokenizer.tokenize(asource)
    max_region_size = int(auditok_stats.mxrs * 1000)
    small_region_size = max_region_size >> 3
    big_region_size = max_region_size - (max_region_size >> 2)
    total_region_size = 0
    for token in tokens:
        # get start and end times
        auditok_stats.events.append(pysubs2.SSAEvent(
            start=token[1] * 10,
            end=token[2] * 10))
        dura = (token[2] - token[1]) * 10
        total_region_size = total_region_size + dura
        if dura <= small_region_size:
            auditok_stats.small_region_count = auditok_stats.small_region_count + 1
        elif dura >= big_region_size:
            auditok_stats.big_region_count = auditok_stats.big_region_count + 1
    average_region_size = total_region_size / len(auditok_stats.events)
    auditok_stats.delta_region_size = abs(average_region_size - (max_region_size >> 1))
    # reference
    # auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data
    return auditok_stats
Exemple #11
0
        }
    })

# Log the collection of styles info
logger.debug(collection)

# Prepare the output ass file
output_ass = pysubs2.SSAFile()

# This part is not working, so I will comment it till I find the reason.
# output_ass.clear()  # Clear the ass file from all pre-defined styles.

# Insert all styles and their proper text to one ass file object
for details in collection:
    style = pysubs2.SSAStyle()
    style.fontname = collection[details]['fontname']
    style.bold = collection[details]['bold']
    style.italic = collection[details]['italic']

    event = pysubs2.SSAEvent()
    event.text = collection[details]['characters']
    event.style = details

    output_ass.styles[details] = style
    output_ass.append(event)

# Finally save the data to one ass file
output_ass.save('output.ass', encoding='utf-8-sig')
unparsed_ass.save('unparsed_tags.ass', encoding='utf-8-sig')

Exemple #12
0
def pysubs2_ssa_event_add(  # pylint: disable=too-many-branches, too-many-statements
    src_ssafile,
    dst_ssafile,
    text_list,
    style_name,
    same_event_type=0,
):
    """
    Serialize a list of subtitles using pysubs2.
    """
    if not style_name:
        style_name = 'Default'
    if text_list:
        if not src_ssafile:
            if isinstance(text_list[0][0], tuple):
                # text_list is [((start, end), text), ...]
                # text_list provides regions
                for ((start, end), text) in text_list:
                    event = pysubs2.SSAEvent()
                    event.start = start
                    event.end = end
                    event.text = text
                    event.style = style_name
                    dst_ssafile.events.append(event)
            elif isinstance(text_list[0][0], int):
                # text_list is [(start, end), ...]
                # text_list provides regions only
                for start, end in text_list:
                    event = pysubs2.SSAEvent()
                    event.start = start
                    event.end = end
                    event.style = style_name
                    dst_ssafile.events.append(event)
        else:
            # if src_ssafile exist
            # src_ssafile provides regions
            # text_list is [text, text, ...]
            i = 0
            length = len(text_list)
            if same_event_type == 0:
                #  append text_list to new events
                while i < length:
                    event = pysubs2.SSAEvent()
                    event.start = src_ssafile.events[i].start
                    event.end = src_ssafile.events[i].end
                    event.text = text_list[i]
                    event.style = style_name
                    dst_ssafile.events.append(event)
                    i = i + 1
            elif same_event_type == 1:
                # add text_list to src_ssafile
                # before the existing text in event
                if src_ssafile.events[0].style == style_name:
                    # same style
                    while i < length:
                        event = pysubs2.SSAEvent()
                        event.start = src_ssafile.events[i].start
                        event.end = src_ssafile.events[i].end
                        event.text = \
                            text_list[i] + "\\N" + src_ssafile.events[i].text
                        event.style = style_name
                        dst_ssafile.events.append(event)
                        i = i + 1
                else:
                    # different style
                    while i < length:
                        event = pysubs2.SSAEvent()
                        event.start = src_ssafile.events[i].start
                        event.end = src_ssafile.events[i].end
                        event.text = \
                            text_list[i] + \
                            "\\N{{\\r{style_name}}}".format(
                                style_name=src_ssafile.events[i].style) + \
                            src_ssafile.events[i].text
                        event.style = style_name
                        dst_ssafile.events.append(event)
                        i = i + 1
            elif same_event_type == 2:
                # add text_list to src_ssafile
                # after the existing text in event
                if src_ssafile.events[0].style == style_name:
                    # same style
                    while i < length:
                        event = pysubs2.SSAEvent()
                        event.start = src_ssafile.events[i].start
                        event.end = src_ssafile.events[i].end
                        event.text = \
                            src_ssafile.events[i].text + "\\N" + text_list[i]
                        event.style = style_name
                        dst_ssafile.events.append(event)
                        i = i + 1
                else:
                    # different style
                    while i < length:
                        event = pysubs2.SSAEvent()
                        event.start = src_ssafile.events[i].start
                        event.end = src_ssafile.events[i].end
                        event.text = \
                            src_ssafile.events[i].text + \
                            "\\N{{\\r{style_name}}}".format(
                                style_name=style_name) + \
                            text_list[i]
                        event.style = style_name
                        dst_ssafile.events.append(event)
                        i = i + 1
    else:
        # src_ssafile provides regions only
        i = 0
        length = len(src_ssafile.events)
        while i < length:
            event = pysubs2.SSAEvent()
            event.start = src_ssafile.events[i].start
            event.end = src_ssafile.events[i].end
            event.style = style_name
            dst_ssafile.events.append(event)
            i = i + 1
Exemple #13
0
                styling.get(u'{http://www.w3.org/ns/ttml#styling}color')[3:5],
                16)
            b = int(
                styling.get(u'{http://www.w3.org/ns/ttml#styling}color')[5:7],
                16)
            a = 0
        styledict[stylename] = (pysubs2.SSAStyle(
            primarycolor=pysubs2.Color(r=r, g=g, b=b, a=a)))

for line in captions:
    start = pysubs2.time.timestamp_to_ms(
        pysubs2.time.TIMESTAMP.match(line.get('begin')).groups())
    end = pysubs2.time.timestamp_to_ms(
        pysubs2.time.TIMESTAMP.match(line.get('end')).groups())
    if len(list(line)) == 0:
        print('------')
        text = line.text
        style = line.get('style')
        sublist.append(
            pysubs2.SSAEvent(start=start, end=end, text=text, style=style))
    else:
        for sentence in line:
            text = sentence.text
            style = sentence.get('style')
            sublist.append(
                pysubs2.SSAEvent(start=start, end=end, text=text, style=style))
towritesubs.events = sublist
towritesubs.styles = styledict

towritesubs.save(ttmlname + '.ass')
                      twitchclient_version=twitchclient_version)

subs = pysubs2.SSAFile()
i = 0

text = ''

while 1:
    raw_msg_list = bot.get_message()
    if len(raw_msg_list) > 0:
        if len(text) > 0:
            end = pysubs2.time.make_time(ms=datetime.now().microsecond)
            subs.insert(
                i,
                pysubs2.SSAEvent(start=start,
                                 end=end,
                                 text=text.replace('\\', '\\\\')))
            i = i + 1
        start = pysubs2.time.make_time(ms=datetime.now().microsecond)
        text = ''
        timestamp = get_timestamp(timestamp_format)
        for item in raw_msg_list:
            if record_raw:
                log_add(raw_log_path, timestamp + ' ' + item + '\n')
            username, message = irc_bot.parse_user(item)
            if username != '':
                safe_print(chat_channel + " " + username + ": " + message)
                log_add(log_path,
                        timestamp + ' ' + username + ': ' + message + '\n')
                text += username + ": " + message + '\n'
                subs.save(path=subs_log_path, encoding='utf-8')
Exemple #15
0
    def _create_bullets(self, content, animation):
        """Add phrase information into subtitle object

    Args:
      content (list): phrase information with time stamp
      animation (bool): whether using animation in ass
    """

        default_style = self._subs.styles["Default"]
        default_style.fontsize = 20
        default_style.shadow = 0.3  # shadow: 0.3 px
        default_style.outline = 0.3  # font outline: 0.3 px
        default_style.italic = -1
        default_style.bold = -1
        default_style.marginl = 10
        default_style.marginr = 10
        default_style.marginv = 30 if self._cn_subs else 10

        phrase_style = self._subs.styles["Default"].copy()
        phrase_style.italic = 0
        phrase_style.bold = 0
        phrase_style.alignment = 4
        phrase_style.fontsize = 24
        phrase_style.borderphrase_style = 1
        phrase_style.shadow = 0.3  # shadow: 0.3 px
        phrase_style.backcolor = pysubs2.Color(
            0, 0, 0, 100)  # shadow color: black with (255-100)/255 transparent
        phrase_style.outline = 0.3  # font outline: 0.3 px
        phrase_style.outlinecolor = pysubs2.Color(
            0, 0, 0, 20)  # outline color: black with (255-20)/255 transparent
        phrase_style.marginl = 24
        phrase_style.marginr = 10
        phrase_style.marginv = 10
        phrase_style.primarycolor = pysubs2.Color(
            255, 255, 255, 0)  # font color: white, no transparent
        self._subs.styles["Phrase"] = phrase_style

        verb_style = self._subs.styles["Default"].copy()
        verb_style.italic = 0
        verb_style.bold = 0
        verb_style.alignment = 7
        verb_style.fontsize = 24
        verb_style.borderverb_style = 1
        verb_style.shadow = 0.3  # shadow: 0.3 px
        verb_style.backcolor = pysubs2.Color(
            0, 0, 0, 100)  # shadow color: black with (255-100)/255 transparent
        verb_style.outline = 0.3  # font outline: 0.3 px
        verb_style.outlinecolor = pysubs2.Color(
            0, 0, 0, 20)  # outline color: black with (255-20)/255 transparent
        verb_style.marginl = 24
        verb_style.marginr = 10
        verb_style.marginv = 44
        verb_style.primarycolor = pysubs2.Color(
            255, 255, 255, 0)  # font color: white, no transparent
        self._subs.styles["Verb"] = verb_style

        cn_default_style = self._subs.styles["Default"].copy()
        cn_default_style.fontsize = 20
        cn_default_style.shadow = 0.1  # shadow: 0.1 px
        cn_default_style.outline = 0.1  # font outline: 0.1 px
        cn_default_style.italic = -1
        cn_default_style.bold = -1
        cn_default_style.marginl = 10
        cn_default_style.marginr = 10
        cn_default_style.marginv = 3
        self._subs.styles["CN"] = cn_default_style

        marker_colors = {
            "plain": "{\\c&HFFFFFF&}",
            "verbs": "{\\c&H7C94FF&}",
            "noun_phrases": "{\\c&H93F8E9&}",
        }

        self._subs.events = []
        for bullet in content:
            phrases = bullet["noun_phrases"]
            _phrases = "\\N".join([
                "\\h\\h\\h\\h".join([
                    marker_colors["noun_phrases"] + w["original"],
                    marker_colors["plain"] + w["translated"]
                ]) for w in phrases
            ])
            _verbs = "\\N".join([
                "\\h\\h\\h\\h".join([
                    marker_colors["verbs"] + w["text"],
                    marker_colors["plain"] + "(" + w["lemma"] + ")",
                    marker_colors["plain"] + w["meaning"]
                ]) for w in bullet["verbs"]
            ])

            start = pysubs2.time.timestamp_to_ms(
                pysubs2.time.TIMESTAMP.match(bullet["start"]).groups())
            end = pysubs2.time.timestamp_to_ms(
                pysubs2.time.TIMESTAMP.match(bullet["end"]).groups())
            if animation:
                phrase_event = pysubs2.SSAEvent(start=start,
                                                end=end,
                                                text=_phrases,
                                                style="Phrase",
                                                effect="Scroll up;10;110;" +
                                                str(100000 / (0.90 *
                                                              (end - start))))
                verb_event = pysubs2.SSAEvent(start=start,
                                              end=end,
                                              text=_verbs,
                                              style="Verb",
                                              effect="Scroll up;10;110;" +
                                              str(100000 / (0.90 *
                                                            (end - start))))
            else:
                phrase_event = pysubs2.SSAEvent(start=start,
                                                end=end,
                                                text=_phrases,
                                                style="Phrase")
                verb_event = pysubs2.SSAEvent(start=start,
                                              end=end,
                                              text=_verbs,
                                              style="Verb")

            _markers = " ".join([
                marker_colors[w[1]] + _underline(w) for w in bullet["markers"]
            ])
            event = pysubs2.SSAEvent(start=start,
                                     end=end,
                                     text=_markers,
                                     style="Default")
            self._subs.append(event)
            self._subs.append(phrase_event)
            self._subs.append(verb_event)

        if self._cn_subs:
            for e in self._cn_subs:
                e.style = "CN"
                self._subs.append(e)
Exemple #16
0
def merge_bilingual_assfile(
        # pylint: disable=too-many-locals, too-many-branches, too-many-statements
        subtitles,
        order=1):
    """
    Merge bilingual subtitles file's events automatically.
    """
    style_events = {}
    event_pos = {}

    i = 0
    for event in subtitles.events:
        if event.style not in style_events:
            style_events[event.style] = [event]
            event_pos[event.style] = i
        else:
            style_events[event.style].append(event)
        i = i + 1

    sorted_events_list = sorted(style_events.values(), key=len)
    events_1 = sorted_events_list.pop()
    events_2 = sorted_events_list.pop()

    dst_ssafile = pysubs2.SSAFile()
    src_ssafile = pysubs2.SSAFile()

    if event_pos[events_1[0].style] > event_pos[events_2[0].style] and order:
        # destination language events are behind source language events in a bilingual subtitles
        dst_ssafile.events = events_1
        src_ssafile.events = events_2
    else:
        dst_ssafile.events = events_2
        src_ssafile.events = events_1

    dst_ssafile.sort()
    src_ssafile.sort()

    new_ssafile = pysubs2.SSAFile()
    new_ssafile.styles = subtitles.styles
    new_ssafile.info = subtitles.info

    # default in dst-lf-src order
    dst_length = len(dst_ssafile.events)
    src_length = len(src_ssafile.events)
    i = 0
    j = 0

    start = 0
    end = 0

    events_0 = []
    while i < dst_length and j < src_length:
        if dst_ssafile.events[i].is_comment != src_ssafile.events[j].is_comment:
            if dst_ssafile.events[i].is_comment:
                events_0.append(dst_ssafile.events[i])
                i = i + 1
                continue
            events_0.append(src_ssafile.events[j])
            j = j + 1
            continue
        if dst_ssafile.events[i].start == src_ssafile.events[j].start or \
                dst_ssafile.events[i].end == src_ssafile.events[j].end:
            start = dst_ssafile.events[i].start
            end = dst_ssafile.events[i].end
        elif dst_ssafile.events[i].start >= src_ssafile.events[j].end:
            events_0.append(src_ssafile.events[j])
            j = j + 1
            continue
        elif src_ssafile.events[j].start >= dst_ssafile.events[i].end:
            events_0.append(dst_ssafile.events[i])
            i = i + 1
            continue
        elif src_ssafile.events[j].start < dst_ssafile.events[i].start:
            event = pysubs2.SSAEvent()
            event.start = src_ssafile.events[j].start
            event.end = dst_ssafile.events[i].start
            event.is_comment = src_ssafile.events[j].is_comment
            event.text = src_ssafile.events[j].text
            event.style = src_ssafile.events[j].style
            events_0.append(event)
            start = dst_ssafile.events[i].start

            if src_ssafile.events[j].end > dst_ssafile.events[i].end:
                event = pysubs2.SSAEvent()
                event.start = dst_ssafile.events[i].end
                event.end = src_ssafile.events[j].end
                event.is_comment = src_ssafile.events[j].is_comment
                event.text = src_ssafile.events[j].text
                event.style = src_ssafile.events[j].style
                events_0.append(event)
                end = dst_ssafile.events[i].end
            else:
                end = src_ssafile.events[j].end

        elif dst_ssafile.events[i].start < src_ssafile.events[j].start:
            event = pysubs2.SSAEvent()
            event.start = dst_ssafile.events[i].start
            event.end = src_ssafile.events[j].start
            event.is_comment = dst_ssafile.events[i].is_comment
            event.text = dst_ssafile.events[i].text
            event.style = dst_ssafile.events[i].style
            events_0.append(event)
            start = src_ssafile.events[j].start

            if dst_ssafile.events[i].end > src_ssafile.events[j].end:
                event = pysubs2.SSAEvent()
                event.start = src_ssafile.events[j].end
                event.end = dst_ssafile.events[i].end
                event.is_comment = dst_ssafile.events[i].is_comment
                event.text = dst_ssafile.events[i].text
                event.style = dst_ssafile.events[i].style
                events_0.append(event)
                end = src_ssafile.events[j].end
            else:
                end = dst_ssafile.events[i].end

        event = pysubs2.SSAEvent()
        event.start = start
        event.end = end
        event.is_comment = dst_ssafile.events[i].is_comment
        event.text = \
            dst_ssafile.events[i].text + \
            "\\N{{\\r{style_name}}}".format(
                style_name=src_ssafile.events[j].style) + \
            src_ssafile.events[j].text
        event.style = dst_ssafile.events[i].style
        new_ssafile.events.append(event)
        i = i + 1
        j = j + 1

    if i < dst_length:
        new_ssafile.events = new_ssafile.events + events_0 + dst_ssafile.events[
            i:]
    else:
        new_ssafile.events = new_ssafile.events + events_0 + src_ssafile.events[
            j:]

    for events in sorted_events_list:
        if event_pos[events[0].style] > event_pos[new_ssafile.events[0].style]:
            new_ssafile.events = new_ssafile.events + events
        else:
            new_ssafile.events = events + new_ssafile.events

    return new_ssafile
Exemple #17
0
    def auto_get_vtt_words_index(
            self,
            events,
            stop_words_set_1,
            stop_words_set_2,
            text_limit=constants.DEFAULT_MAX_SIZE_PER_EVENT,
            avoid_split=False):
        """
        Adjust end timestamps and get SSAEvent events and self.vtt_words_index automatically
        by external regions.
        """
        i = 0
        j = 0
        vtt_words_len = len(self.vtt_words)
        vtt_words_index = [0]
        is_started = False
        # last_len = 0
        text_len = 0
        events_len = len(events)
        while j < vtt_words_len and i < events_len:
            if self.vtt_words[j].start < events[i].end:
                if not is_started:
                    # start_delta = events[i].start - self.vtt_words[j].start
                    # if start_delta < 1000:
                    # inside the event
                    # start_delta < 0
                    # or a little ahead of time
                    # 0 <= start_delta < 300
                    self.vtt_words[j].start = events[i].start
                    if self.vtt_words[j].end <= self.vtt_words[j].start:
                        if j < vtt_words_len - 1:
                            if self.vtt_words[j].start < self.vtt_words[
                                    j + 1].start:
                                self.vtt_words[j].end = self.vtt_words[j +
                                                                       1].start
                            else:
                                delta = \
                                    (self.vtt_words[j + 1].end - self.vtt_words[j].start) >> 1
                                self.vtt_words[
                                    j].end = delta + self.vtt_words[j].start
                                self.vtt_words[
                                    j +
                                    1].start = delta + self.vtt_words[j].end
                        else:
                            self.vtt_words[
                                j].end = self.vtt_words[j].start + 200
                    is_started = True
                    # else:
                    #     # check if it's necessary to insert new events
                    #     if i < len(events) - 1:
                    #         events.insert(
                    #             i,
                    #             pysubs2.SSAEvent(start=self.vtt_words[j].start,
                    #                              end=events[i].start))
                    #     else:
                    #         events.insert(
                    #             i,
                    #             pysubs2.SSAEvent(start=self.vtt_words[j].start,
                    #                              end=self.vtt_words[j].start + 5000))
                    #     events[i].is_comment = True
                    #     # the end time is estimated so it needs a trim
                    #     continue
                text_len = text_len + len(self.vtt_words[j].word) + 1
                if text_len > text_limit and not avoid_split:
                    vtt_word_dict = get_vtt_slice_pos_dict(
                        self.vtt_words[vtt_words_index[-1]:j])
                    stop_word_set = stop_words_set_1 & set(
                        vtt_word_dict.keys())
                    last_index = find_split_vtt_word(
                        total_length=text_len,
                        stop_word_set=stop_word_set,
                        vtt_word_dict=vtt_word_dict,
                        min_range_ratio=0.1)
                    if not last_index[1]:
                        stop_word_set = stop_words_set_2 & set(
                            vtt_word_dict.keys())
                        last_index = find_split_vtt_word(
                            total_length=text_len,
                            stop_word_set=stop_word_set,
                            vtt_word_dict=vtt_word_dict,
                            min_range_ratio=0.1)

                    if 0 < last_index[1] < text_limit:
                        vtt_words_index.append(vtt_words_index[-1] +
                                               last_index[0])
                        last_end = events[i].end
                        events[i].end = self.vtt_words[
                            vtt_words_index[-1]].start
                        events.insert(
                            i + 1,
                            pysubs2.SSAEvent(start=events[i].end,
                                             end=last_end))
                        i = i + 1
                        events_len = events_len + 1
                        text_len = text_len - last_index[1]
                j = j + 1
            else:
                if text_len:
                    # if events[i].is_comment:
                    #     # trim the empty region
                    #     cur_speed = text_len * 1000 // events[i].duration
                    #     if last_len:
                    #         last_speed = last_len * 1000 // events[i - 1].duration
                    #     else:
                    #         last_speed = 10
                    #     if cur_speed < (last_speed >> 2):
                    #         events[i].duration = last_speed * events[i].duration // 1000
                    #     events[i].is_comment = False
                    # last_len = text_len
                    text_len = 0
                    if j - vtt_words_index[-1] > 1:
                        if self.vtt_words[j - 1].speed < 10:
                            # if the duration is too big
                            # it means the start time is not accurate
                            j = j - 1
                            self.vtt_words[j - 1].end = events[i].end
                    vtt_words_index.append(j)
                    is_started = False
                    i = i + 1
                else:
                    del events[i]

        vtt_words_index = vtt_words_index[1:]
        if j == vtt_words_len:
            vtt_words_index.append(j)
            events = events[:len(vtt_words_index)]
            self.vtt_words_index = vtt_words_index
            return events
        return None
Exemple #18
0
    def man_get_vtt_words_index(self):
        """
        Get end timestamps from a SSAEvent list automatically by external regions.
        """
        events = []
        path = self.path[:-3] + "txt"
        path = str_to_file(str_=self.to_text_str(), output=path, input_m=input)
        input(
            _("Wait for the events manual adjustment. "
              "Press Enter to continue."))
        line_count = 0
        i = 0
        j = 0
        vtt_len = len(self.vtt_words)
        is_paused = False
        trans = str.maketrans(string.punctuation,
                              " " * len(string.punctuation))
        while True:
            file_p = open(path, encoding=constants.DEFAULT_ENCODING)
            line_list = file_p.readlines()
            line_list_len = len(line_list)
            file_p.close()
            k = line_count
            while k < line_list_len:
                word_list = line_list[k].split()
                event = pysubs2.SSAEvent(start=self.vtt_words[i].start)
                word_list_len = len(word_list)
                while j < word_list_len:
                    if self.vtt_words[i].word != word_list[j]:
                        if fuzz.partial_ratio(
                                self.vtt_words[i].word.lower().translate(
                                    trans).replace(" ", ""),
                                word_list[j].lower().translate(trans).replace(
                                    " ", "")) != 100:
                            if self.vtt_words_index:
                                start_delta = self.vtt_words_index[-1]
                            else:
                                start_delta = 0
                            if i < vtt_len - 5:
                                end_delta = i + 6
                            else:
                                end_delta = vtt_len
                            print(
                                _("\nLine {num}, word {num2}").format(
                                    num=len(events), num2=j))
                            cur_line = ""
                            for vtt_word in self.vtt_words[
                                    start_delta:end_delta]:
                                cur_line = "{cur_line} {word}".format(
                                    cur_line=cur_line, word=vtt_word.word)
                            print(cur_line)
                            print(" ".join(word_list))
                            print("{word} | {word2}".format(
                                word=self.vtt_words[i].word,
                                word2=word_list[j]))
                            result = input(
                                _("Press Enter to manual adjust. "
                                  "Input 1 to overwrite."))
                            if result != "1":
                                line_count = k
                                is_paused = True
                                break

                            self.vtt_words[i].word = word_list[j]
                            is_paused = False
                        else:
                            if is_paused:
                                is_paused = False
                            self.vtt_words[i].word = word_list[j]

                    i = i + 1
                    j = j + 1
                    if i > vtt_len:
                        break
                if is_paused:
                    break
                j = 0
                self.vtt_words_index.append(i)
                if i:
                    event.end = self.vtt_words[i - 1].end
                events.append(event)
                k = k + 1
            if not is_paused:
                break
        constants.DELETE_PATH(path)
        return events