예제 #1
0
파일: core.py 프로젝트: zkcpku/autosub
def list_to_ass_str(  # pylint: disable=too-many-arguments
        text_list,
        styles_list,
        subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT):
    """
    Give an input timed text list, format it to an ass string.
    """

    if subtitles_file_format == 'ass' \
            or subtitles_file_format == 'ssa'\
            or subtitles_file_format == 'ass.json':
        pysubs2_obj = pysubs2.SSAFile()
        pysubs2_obj.styles = \
            {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)}
        if not isinstance(text_list[0], list):
            # text_list is [((start, end), text), ...]
            # text_list provides regions
            sub_utils.pysubs2_ssa_event_add(src_ssafile=None,
                                            dst_ssafile=pysubs2_obj,
                                            text_list=text_list,
                                            style_name=styles_list[0])
        else:
            # text_list is [[src_list], [dst_list]]
            # src_list provides regions
            sub_utils.pysubs2_ssa_event_add(src_ssafile=None,
                                            dst_ssafile=pysubs2_obj,
                                            text_list=text_list[0],
                                            style_name=styles_list[0])
            if len(styles_list) == 1:
                sub_utils.pysubs2_ssa_event_add(src_ssafile=None,
                                                dst_ssafile=pysubs2_obj,
                                                text_list=text_list[1],
                                                style_name=styles_list[0])
            else:
                sub_utils.pysubs2_ssa_event_add(src_ssafile=None,
                                                dst_ssafile=pysubs2_obj,
                                                text_list=text_list[1],
                                                style_name=styles_list[2])

        if subtitles_file_format != 'ass.json':
            formatted_subtitles = pysubs2_obj.to_string(
                format_=subtitles_file_format)
        else:
            formatted_subtitles = pysubs2_obj.to_string(format_='json')
    else:
        # fallback process
        print(
            _("Format \"{fmt}\" not supported. "
              "Using \"{default_fmt}\" instead.").format(
                  fmt=subtitles_file_format,
                  default_fmt=constants.DEFAULT_SUBTITLES_FORMAT))
        pysubs2_obj = pysubs2.SSAFile()
        sub_utils.pysubs2_ssa_event_add(src_ssafile=None,
                                        dst_ssafile=pysubs2_obj,
                                        text_list=text_list,
                                        style_name=None)
        formatted_subtitles = pysubs2_obj.to_string(
            format_=constants.DEFAULT_SUBTITLES_FORMAT)

    return formatted_subtitles, subtitles_file_format
예제 #2
0
def list_to_ass_str(
        text_list,
        styles_list,
        subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT,
        same_event_type=0):
    """
    Give an input timed text list, format it to an ass string.
    """
    pysubs2_obj = pysubs2.SSAFile()
    pysubs2_obj.styles = \
        {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)}
    if not isinstance(text_list[0], list):
        # text_list is [((start, end), text), ...]
        # text_list provides regions
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=text_list,
            style_name=styles_list[0])
    else:
        # text_list is [[src_list], [dst_list]]
        # src_list provides regions
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=text_list[0],
            style_name=styles_list[0])
        src_obj = pysubs2_obj
        pysubs2_obj = pysubs2.SSAFile()
        if len(styles_list) == 1:
            sub_utils.pysubs2_ssa_event_add(
                src_ssafile=src_obj,
                dst_ssafile=pysubs2_obj,
                text_list=text_list[1],
                style_name=styles_list[0],
                same_event_type=same_event_type)
        else:
            sub_utils.pysubs2_ssa_event_add(
                src_ssafile=src_obj,
                dst_ssafile=pysubs2_obj,
                text_list=text_list[1],
                style_name=styles_list[2],
                same_event_type=same_event_type)

    if subtitles_file_format != 'ass.json':
        formatted_subtitles = pysubs2_obj.to_string(format_=subtitles_file_format)
    else:
        formatted_subtitles = pysubs2_obj.to_string(format_='json')

    return formatted_subtitles
예제 #3
0
    def write_file(self, fname: str) -> None:
        # TODO: converter to go between self.subs_format and out_format
        if fname is None:
            out_format = self._sub_format
        else:
            out_format = os.path.splitext(fname)[-1][1:]
        subs = list(self.gen_raw_resolved_subs())
        if self._sub_format in ("ssa", "ass"):
            ssaf = pysubs2.SSAFile()
            ssaf.events = subs
            if self._styles is not None:
                ssaf.styles = self._styles
            if self._info is not None:
                ssaf.info = self._info
            if self._fonts_opaque is not None:
                ssaf.fonts_opaque = self._fonts_opaque
            to_write = ssaf.to_string(out_format)
        elif self._sub_format == "srt" and out_format in ("ssa", "ass"):
            to_write = pysubs2.SSAFile.from_string(
                srt.compose(subs)).to_string(out_format)
        elif out_format == "srt":
            to_write = srt.compose(subs)
        else:
            raise NotImplementedError("unsupported output format: %s" %
                                      out_format)

        to_write = to_write.encode(self._encoding)
        if six.PY3:
            with open(fname or sys.stdout.fileno(), "wb") as f:
                f.write(to_write)
        else:
            with (fname and open(fname, "wb")) or sys.stdout as f:
                f.write(to_write)
예제 #4
0
    def write_file(self, fname):
        # TODO: converter to go between self.subs_format and out_format
        if fname is None:
            out_format = self._sub_format
        else:
            out_format = os.path.splitext(fname)[-1][1:]
        subs = list(self.gen_raw_resolved_subs())
        if self._sub_format in ('ssa', 'ass'):
            ssaf = pysubs2.SSAFile()
            ssaf.events = subs
            ssaf.styles = self.styles
            if self.info is not None:
                ssaf.info = self.info
            to_write = ssaf.to_string(out_format)
        elif self._sub_format == 'srt' and out_format in ('ssa', 'ass'):
            to_write = pysubs2.SSAFile.from_string(
                srt.compose(subs)).to_string(out_format)
        elif out_format == 'srt':
            to_write = srt.compose(subs)
        else:
            raise NotImplementedError('unsupported output format: %s' %
                                      out_format)

        to_write = to_write.encode(self.encoding)
        if six.PY3:
            with open(fname or sys.stdout.fileno(), 'wb') as f:
                f.write(to_write)
        else:
            with (fname and open(fname, 'wb')) or sys.stdout as f:
                f.write(to_write)
예제 #5
0
def convert_yt_comments(jsonname, comment_duration, video_info, outputname):
    with open(jsonname) as f:
        yt_comments = json.load(f)

    if len(yt_comments) == 0:
        return

    subs = pysubs2.SSAFile()
    subs.info["PlayResX"] = 384
    subs.info["PlayResY"] = 288

    start_time_shift = yt_comments[0]["time_in_seconds"] * 1000

    comment_channel = []
    comment_size = 20
    for i in range(0, subs.info["PlayResY"], comment_size):
        comment_channel.append(None)

    for msg in yt_comments:
        now = msg["time_in_seconds"] * 1000
        if now > video_info["duration"] * 1000:
            #            print(now, ">", video_info["duration"] * 1000)
            continue

        if not msg["message"]:
            continue

        selected_channel = 1
        for index, chan in enumerate(comment_channel):
            if (not chan or chan["time_in_seconds"] * 1000 +
                (200 * len(msg["message"])) < now):
                comment_channel[index] = msg
                selected_channel = index + 1
                break

        movement = ("{\move(414," + str(selected_channel * 20) + ",-30," +
                    str(selected_channel * 20) + ",0," +
                    str(comment_duration) + ")}")

        subs.append(
            pysubs2.SSAEvent(
                start=pysubs2.make_time(ms=msg["time_in_seconds"] * 1000),
                end=pysubs2.make_time(ms=(msg["time_in_seconds"] * 1000) +
                                      comment_duration),
                text=movement + msg["message"]))

    subs.shift(ms=-start_time_shift + 100)
    subs.save(outputname)
예제 #6
0
    def write_file(self, fname):
        subs = list(self.gen_raw_resolved_subs())
        if self.sub_format == 'srt':
            to_write = srt.compose(subs)
        elif self.sub_format in ('ssa', 'ass'):
            ssaf = pysubs2.SSAFile()
            ssaf.events = subs
            to_write = ssaf.to_string(self.sub_format)
        else:
            raise NotImplementedError('unsupported format: %s' %
                                      self.sub_format)

        to_write = to_write.encode(self.encoding)
        if six.PY3:
            with open(fname or sys.stdout.fileno(), 'wb') as f:
                f.write(to_write)
        else:
            with (fname and open(fname, 'wb')) or sys.stdout as f:
                f.write(to_write)
예제 #7
0
def list_to_vtt_str(subtitles):
    """
    Serialize a list of subtitles according to the VTT format.
    """
    pysubs2_obj = pysubs2.SSAFile()
    pysubs2_ssa_event_add(src_ssafile=None,
                          dst_ssafile=pysubs2_obj,
                          text_list=subtitles)
    formatted_subtitles = pysubs2_obj.to_string(format_='srt')
    i = 0
    lines = formatted_subtitles.split('\n')
    new_lines = []
    for line in lines:
        if i % 4 == 1:
            line = line.replace(',', '.')
        new_lines.append(line)
        i = i + 1
    formatted_subtitles = '\n'.join(new_lines)
    formatted_subtitles = 'WEBVTT\n\n' + formatted_subtitles
    return formatted_subtitles
예제 #8
0
def make_ass(wav, segments, transcriptions, utt2spk, ass):
    """
    Формирование .ASS файла из транскрибаций
    
    Аргументы:
       wav: наименование аудио файла
       segments: путь к файлу описания сегментов
       transcriptions: путь к файлу транскрибации
       utt2spk: путь к файлу сопоставления сегментов и говорящих
       ass: путь к .ASS файлу субтитров
    """
    sub = pysubs2.SSAFile()
    sub.info['Title'] = 'Default Aegisub file'
    sub.info['YCbCr Matrix'] = 'None'
    sub.aegisub_project['Audio File'] = wav
    sub.aegisub_project['Scroll Position'] = 0
    sub.aegisub_project['Active Line'] = 0
    segments_df = pd.read_csv(segments,
                              header=None,
                              sep=' ',
                              names=['utt_id', 'wav', 'start', 'end'])
    transcriptions_df = pd.read_csv(transcriptions,
                                    sep='\t',
                                    header=None,
                                    names=['utt_id', 'text'])
    utt2spk_df = pd.read_csv(utt2spk,
                             sep='\t',
                             header=None,
                             names=['utt_id', 'speaker'])
    events = segments_df.merge(transcriptions_df, how='left',
                               on='utt_id').merge(utt2spk_df,
                                                  how='left',
                                                  on='utt_id').fillna('')
    for row in events.values:
        event = pysubs2.SSAEvent(start=pysubs2.make_time(s=float(row[2])),
                                 end=pysubs2.make_time(s=float(row[3])),
                                 text=row[4],
                                 name=row[5])
        sub.events.append(event)
    sub.sort()
    sub.save(ass, format_='ass')
예제 #9
0
def list_to_sub_str(
        timed_text,
        fps=30.0,
        subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT):
    """
    Give an input timed text list, format it to a string.
    """

    if subtitles_file_format in ('srt', 'tmp', 'ass', 'ssa'):
        pysubs2_obj = pysubs2.SSAFile()
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=timed_text)
        formatted_subtitles = pysubs2_obj.to_string(
            format_=subtitles_file_format)

    elif subtitles_file_format == 'vtt':
        formatted_subtitles = sub_utils.list_to_vtt_str(
            subtitles=timed_text)

    elif subtitles_file_format == 'json':
        formatted_subtitles = sub_utils.list_to_json_str(
            subtitles=timed_text)

    elif subtitles_file_format == 'ass.json':
        pysubs2_obj = pysubs2.SSAFile()
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=timed_text)
        formatted_subtitles = pysubs2_obj.to_string(
            format_='json')

    elif subtitles_file_format == 'txt':
        formatted_subtitles = sub_utils.list_to_txt_str(
            subtitles=timed_text)

    elif subtitles_file_format == 'sub':
        pysubs2_obj = pysubs2.SSAFile()
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=timed_text)
        formatted_subtitles = pysubs2_obj.to_string(
            format_='microdvd',
            fps=fps)
        # sub format need fps
        # ref https://pysubs2.readthedocs.io/en/latest
        # /api-reference.html#supported-input-output-formats

    elif subtitles_file_format == 'mpl2.txt':
        pysubs2_obj = pysubs2.SSAFile()
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=timed_text)
        formatted_subtitles = pysubs2_obj.to_string(
            format_='mpl2',
            fps=fps)

    else:
        # fallback process
        print(_("Format \"{fmt}\" not supported. "
                "Use \"{default_fmt}\" instead.").format(
                    fmt=subtitles_file_format,
                    default_fmt=constants.DEFAULT_SUBTITLES_FORMAT))
        pysubs2_obj = pysubs2.SSAFile()
        sub_utils.pysubs2_ssa_event_add(
            src_ssafile=None,
            dst_ssafile=pysubs2_obj,
            text_list=timed_text)
        formatted_subtitles = pysubs2_obj.to_string(
            format_=constants.DEFAULT_SUBTITLES_FORMAT)

    return formatted_subtitles
예제 #10
0
def subs_trans(  # pylint: disable=too-many-branches, too-many-statements, too-many-locals
        args,
        input_m=input,
        fps=30.0,
        styles_list=None):
    """
    Give args and translate a subtitles file.
    """
    if not args.output_files:
        raise exceptions.AutosubException(
            _("\nNo works done."
              " Check your \"-of\"/\"--output-files\" option."))

    src_sub = pysubs2.SSAFile.load(args.input)
    text_list = []

    if args.styles and \
            (args.format == 'ass' or
             args.format == 'ssa' or
             args.format == 'ass.json'):
        src_sub.styles = \
            {styles_list[i]: styles_list[i + 1] for i in range(0, len(styles_list), 2)}
        for event in src_sub.events:
            event.style = styles_list[0]
            text_list.append(event.text)
    else:
        styles_list = [
            src_sub.events[0].style,
        ]
        for event in src_sub.events:
            text_list.append(event.text)

    # text translation
    if args.gtransv2:
        # use gtransv2
        translated_text = core.list_to_gtv2(
            text_list=text_list,
            api_key=args.gtransv2,
            concurrency=args.trans_concurrency,
            src_language=args.src_language,
            dst_language=args.dst_language,
            lines_per_trans=args.lines_per_trans)
    else:
        # use googletrans
        translated_text = core.list_to_googletrans(
            text_list,
            src_language=args.src_language,
            dst_language=args.dst_language,
            sleep_seconds=args.sleep_seconds,
            user_agent=args.user_agent,
            service_urls=args.service_urls)

    if not translated_text or len(translated_text) != len(text_list):
        raise exceptions.AutosubException(_("Error: Translation failed."))

    try:
        args.output_files.remove("bilingual")
        bilingual_sub = pysubs2.SSAFile()
        bilingual_sub.styles = src_sub.styles
        bilingual_sub.events = src_sub.events[:]
        if args.styles and \
                len(styles_list) == 2 and \
                (args.format == 'ass' or
                 args.format == 'ssa' or
                 args.format == 'ass.json'):
            sub_utils.pysubs2_ssa_event_add(src_ssafile=bilingual_sub,
                                            dst_ssafile=bilingual_sub,
                                            text_list=translated_text,
                                            style_name=styles_list[2])
        else:
            sub_utils.pysubs2_ssa_event_add(src_ssafile=bilingual_sub,
                                            dst_ssafile=bilingual_sub,
                                            text_list=translated_text,
                                            style_name=styles_list[0])

        if args.format != 'ass.json':
            bilingual_string = bilingual_sub.to_string(format_=args.format,
                                                       fps=fps)
        else:
            bilingual_string = bilingual_sub.to_string(format_='json')

        if args.format == 'mpl2':
            extension = 'mpl2.txt'
        else:
            extension = args.format

        bilingual_name = "{base}.{nt}.{extension}".format(
            base=args.output,
            nt=args.src_language + '&' + args.dst_language,
            extension=extension)

        subtitles_file_path = core.str_to_file(str_=bilingual_string,
                                               output=bilingual_name,
                                               input_m=input_m)
        # subtitles string to file
        print(
            _("Bilingual subtitles file "
              "created at \"{}\".").format(subtitles_file_path))

        if not args.output_files:
            raise exceptions.AutosubException(_("\nAll works done."))

    except KeyError:
        pass

    try:
        args.output_files.remove("dst")
        dst_sub = pysubs2.SSAFile()
        dst_sub.styles = src_sub.styles
        if len(styles_list) == 2:
            sub_utils.pysubs2_ssa_event_add(src_ssafile=src_sub,
                                            dst_ssafile=dst_sub,
                                            text_list=translated_text,
                                            style_name=styles_list[2])
        else:
            sub_utils.pysubs2_ssa_event_add(src_ssafile=src_sub,
                                            dst_ssafile=dst_sub,
                                            text_list=translated_text,
                                            style_name=styles_list[0])

        if args.format != 'ass.json':
            dst_string = dst_sub.to_string(format_=args.format, fps=fps)
        else:
            dst_string = dst_sub.to_string(format_='json')
        if args.format == 'mpl2':
            extension = 'mpl2.txt'
        else:
            extension = args.format
        dst_name = "{base}.{nt}.{extension}".format(base=args.output,
                                                    nt=args.dst_language,
                                                    extension=extension)
        subtitles_file_path = core.str_to_file(str_=dst_string,
                                               output=dst_name,
                                               input_m=input_m)
        # subtitles string to file
        print(
            _("Destination language subtitles "
              "file created at \"{}\".").format(subtitles_file_path))

    except KeyError:
        pass
예제 #11
0
import xml.etree.ElementTree as ET
import pysubs2
from auto_sub import find_type_file

ttmlname = find_type_file('.ttml')
tree = ET.parse(ttmlname)
root = tree.getroot()
styles = root[0][0]
captions = root[1][0]
sublist = []
styledict = dict()

towritesubs = pysubs2.SSAFile()
for styling in styles:
    color = styling.get(u'{http://www.w3.org/ns/ttml#styling}color')
    stylename = styling.get(u'{http://www.w3.org/XML/1998/namespace}id')
    if color:
        if color == "white":
            r = 255
            g = 255
            b = 255
            a = 0
        elif color == "black":
            r = 0
            g = 0
            b = 0
            a = 0
        else:
            r = int(
                styling.get(u'{http://www.w3.org/ns/ttml#styling}color')[1:3],
                16)
예제 #12
0
def merge_bilingual_assfile(
        # pylint: disable=too-many-locals, too-many-branches, too-many-statements
        subtitles,
        order=1):
    """
    Merge bilingual subtitles file's events automatically.
    """
    style_events = {}
    event_pos = {}

    i = 0
    for event in subtitles.events:
        if event.style not in style_events:
            style_events[event.style] = [event]
            event_pos[event.style] = i
        else:
            style_events[event.style].append(event)
        i = i + 1

    sorted_events_list = sorted(style_events.values(), key=len)
    events_1 = sorted_events_list.pop()
    events_2 = sorted_events_list.pop()

    dst_ssafile = pysubs2.SSAFile()
    src_ssafile = pysubs2.SSAFile()

    if event_pos[events_1[0].style] > event_pos[events_2[0].style] and order:
        # destination language events are behind source language events in a bilingual subtitles
        dst_ssafile.events = events_1
        src_ssafile.events = events_2
    else:
        dst_ssafile.events = events_2
        src_ssafile.events = events_1

    dst_ssafile.sort()
    src_ssafile.sort()

    new_ssafile = pysubs2.SSAFile()
    new_ssafile.styles = subtitles.styles
    new_ssafile.info = subtitles.info

    # default in dst-lf-src order
    dst_length = len(dst_ssafile.events)
    src_length = len(src_ssafile.events)
    i = 0
    j = 0

    start = 0
    end = 0

    events_0 = []
    while i < dst_length and j < src_length:
        if dst_ssafile.events[i].is_comment != src_ssafile.events[j].is_comment:
            if dst_ssafile.events[i].is_comment:
                events_0.append(dst_ssafile.events[i])
                i = i + 1
                continue
            events_0.append(src_ssafile.events[j])
            j = j + 1
            continue
        if dst_ssafile.events[i].start == src_ssafile.events[j].start or \
                dst_ssafile.events[i].end == src_ssafile.events[j].end:
            start = dst_ssafile.events[i].start
            end = dst_ssafile.events[i].end
        elif dst_ssafile.events[i].start >= src_ssafile.events[j].end:
            events_0.append(src_ssafile.events[j])
            j = j + 1
            continue
        elif src_ssafile.events[j].start >= dst_ssafile.events[i].end:
            events_0.append(dst_ssafile.events[i])
            i = i + 1
            continue
        elif src_ssafile.events[j].start < dst_ssafile.events[i].start:
            event = pysubs2.SSAEvent()
            event.start = src_ssafile.events[j].start
            event.end = dst_ssafile.events[i].start
            event.is_comment = src_ssafile.events[j].is_comment
            event.text = src_ssafile.events[j].text
            event.style = src_ssafile.events[j].style
            events_0.append(event)
            start = dst_ssafile.events[i].start

            if src_ssafile.events[j].end > dst_ssafile.events[i].end:
                event = pysubs2.SSAEvent()
                event.start = dst_ssafile.events[i].end
                event.end = src_ssafile.events[j].end
                event.is_comment = src_ssafile.events[j].is_comment
                event.text = src_ssafile.events[j].text
                event.style = src_ssafile.events[j].style
                events_0.append(event)
                end = dst_ssafile.events[i].end
            else:
                end = src_ssafile.events[j].end

        elif dst_ssafile.events[i].start < src_ssafile.events[j].start:
            event = pysubs2.SSAEvent()
            event.start = dst_ssafile.events[i].start
            event.end = src_ssafile.events[j].start
            event.is_comment = dst_ssafile.events[i].is_comment
            event.text = dst_ssafile.events[i].text
            event.style = dst_ssafile.events[i].style
            events_0.append(event)
            start = src_ssafile.events[j].start

            if dst_ssafile.events[i].end > src_ssafile.events[j].end:
                event = pysubs2.SSAEvent()
                event.start = src_ssafile.events[j].end
                event.end = dst_ssafile.events[i].end
                event.is_comment = dst_ssafile.events[i].is_comment
                event.text = dst_ssafile.events[i].text
                event.style = dst_ssafile.events[i].style
                events_0.append(event)
                end = src_ssafile.events[j].end
            else:
                end = dst_ssafile.events[i].end

        event = pysubs2.SSAEvent()
        event.start = start
        event.end = end
        event.is_comment = dst_ssafile.events[i].is_comment
        event.text = \
            dst_ssafile.events[i].text + \
            "\\N{{\\r{style_name}}}".format(
                style_name=src_ssafile.events[j].style) + \
            src_ssafile.events[j].text
        event.style = dst_ssafile.events[i].style
        new_ssafile.events.append(event)
        i = i + 1
        j = j + 1

    if i < dst_length:
        new_ssafile.events = new_ssafile.events + events_0 + dst_ssafile.events[
            i:]
    else:
        new_ssafile.events = new_ssafile.events + events_0 + src_ssafile.events[
            j:]

    for events in sorted_events_list:
        if event_pos[events[0].style] > event_pos[new_ssafile.events[0].style]:
            new_ssafile.events = new_ssafile.events + events
        else:
            new_ssafile.events = events + new_ssafile.events

    return new_ssafile
예제 #13
0
def split_dst_lf_src_assfile(  # pylint: disable=too-many-locals, too-many-branches
        subtitles,
        order=1,
        style_name=None):
    """
    Split bilingual subtitles file's events automatically.
    """
    style_events = {}
    event_pos = {}
    i = 0
    for event in subtitles.events:
        if event.style not in style_events:
            style_events[event.style] = [event]
            event_pos[event.style] = i
        else:
            style_events[event.style].append(event)
        i = i + 1

    sorted_events_list = sorted(style_events.values(), key=len)
    events_1 = sorted_events_list.pop()

    new_ssafile = pysubs2.SSAFile()
    new_ssafile.styles = subtitles.styles
    new_ssafile.info = subtitles.info

    new_events_1 = []
    new_events_2 = []

    if len(style_name) == 1:
        style_name = [style_name[0], style_name[0]]
    elif not style_name:
        style_name = [events_1[0].style, events_1[0].style]

    for event in events_1:
        new_text_list = event.text.split(r'\N')
        new_events_1.append(copy.deepcopy(event))
        if len(new_text_list) == 2:
            new_events_1[-1].text = new_text_list[0]
            styles = re.compile(r"{\\r(.*?)}").findall(new_text_list[1])
            new_events_1[-1].style = style_name[0]
            new_events_2.append(copy.deepcopy(event))
            if styles:
                styles = styles[0].split("\\")
                if len(styles) > 1:
                    new_events_2[-1].text = "{\\" + new_text_list[1][
                        4 + len(styles[0]):]
                else:
                    new_events_2[-1].text = new_text_list[1][4 +
                                                             len(styles[0]):]
                new_events_2[-1].style = styles[0]
            else:
                new_events_2[-1].text = new_text_list[1]
                new_events_2[-1].style = style_name[1]

    if order:
        new_events = new_events_1 + new_events_2
    else:
        new_events = new_events_2 + new_events_1

    sorted_events_list.append(new_events)

    for events in sorted_events_list:
        new_ssafile.events = new_ssafile.events + events

    return new_ssafile
예제 #14
0
def merge_src_assfile(  # pylint: disable=too-many-locals, too-many-nested-blocks,
        # pylint: disable=too-many-statements, too-many-branches, too-many-arguments
        # pylint: disable=too-many-boolean-expressions
        subtitles,
        stop_words_set_1,
        stop_words_set_2,
        max_join_size=constants.DEFAULT_MAX_SIZE_PER_EVENT,
        max_delta_time=int(constants.DEFAULT_CONTINUOUS_SILENCE * 1000),
        delimiters=constants.DEFAULT_EVENT_DELIMITERS,
        avoid_split=False):
    """
    Merge a source subtitles file's events automatically.
    """
    new_ssafile = pysubs2.SSAFile()
    new_ssafile.styles = subtitles.styles
    new_ssafile.info = subtitles.info
    style_events = {}

    for event in subtitles.events:
        event.text = event.text.replace("\\N", " ")
        if event.style not in style_events:
            style_events[event.style] = [event]
        else:
            style_events[event.style].append(event)

    sorted_events_list = sorted(style_events.values(), key=len)
    events_1 = sorted_events_list.pop()

    temp_ssafile = pysubs2.SSAFile()
    temp_ssafile.events = events_1
    temp_ssafile.sort()

    sub_length = len(temp_ssafile.events)
    event_count = 1
    merge_count = 0
    split_count = 0

    new_ssafile.events.append(temp_ssafile.events[0])

    while event_count < sub_length:
        if not new_ssafile.events[-1].is_comment \
                and not temp_ssafile.events[event_count].is_comment \
                and new_ssafile.events[-1].style == temp_ssafile.events[event_count].style \
                and temp_ssafile.events[event_count].start \
                - new_ssafile.events[-1].end < max_delta_time \
                and new_ssafile.events[-1].text.rstrip(" ")[-1] not in delimiters \
                and temp_ssafile.events[event_count].text.lstrip(" ")[0] not in delimiters:
            if len(new_ssafile.events[-1].text) + \
                    len(temp_ssafile.events[event_count].text) < max_join_size:
                new_ssafile.events[-1].end = temp_ssafile.events[
                    event_count].end
                if new_ssafile.events[-1].text[-1] != " ":
                    new_ssafile.events[-1].text = new_ssafile.events[-1].text + " " + \
                                                  temp_ssafile.events[event_count].text
                else:
                    new_ssafile.events[-1].text = \
                        new_ssafile.events[-1].text + temp_ssafile.events[event_count].text
                merge_count = merge_count + 1
                event_count = event_count + 1
                continue

            if not avoid_split:
                if len(new_ssafile.events[-1].text) \
                        > len(temp_ssafile.events[event_count].text) * 1.4 and \
                        len(new_ssafile.events[-1].text) > max_join_size * 0.8:
                    joint_event = new_ssafile.events[-1]
                else:
                    joint_event = join_event(new_ssafile.events[-1],
                                             temp_ssafile.events[event_count])
                event_list = []
                while True:
                    word_dict = get_slice_pos_dict(joint_event.text,
                                                   delimiters=delimiters)
                    total_length = len(joint_event.text)
                    # use punctuations to split the sentence first
                    stop_word_set = set(word_dict.keys())
                    last_index = find_split_index(total_length=total_length,
                                                  stop_word_set=stop_word_set,
                                                  word_dict=word_dict,
                                                  min_range_ratio=0.1)

                    if len(word_dict) < 2 or not last_index:
                        # then use stop words
                        word_dict = get_slice_pos_dict(joint_event.text)
                        stop_word_set = stop_words_set_1 & \
                                        set(word_dict.keys())
                        last_index = find_split_index(
                            total_length=total_length,
                            stop_word_set=stop_word_set,
                            word_dict=word_dict,
                            min_range_ratio=0.1)
                        if not last_index:
                            stop_word_set = stop_words_set_2 & \
                                            set(word_dict.keys())
                            last_index = find_split_index(
                                total_length=total_length,
                                stop_word_set=stop_word_set,
                                word_dict=word_dict,
                                min_range_ratio=0.1)

                    if 0 < last_index < max_join_size:
                        if total_length - last_index < max_join_size:
                            event_list.extend(
                                split_event(joint_event, last_index))
                            if joint_event.text in new_ssafile.events[-1].text:
                                last_index = -2
                            else:
                                last_index = -1
                            new_ssafile.events.pop()
                            if len(event_list) > 2:
                                count = 0
                                while count < len(event_list) - 1:
                                    joint_event = join_event(
                                        event_list[count],
                                        event_list[count + 1])
                                    if len(joint_event.text) < max_join_size:
                                        del event_list[count + 1]
                                        event_list[count] = joint_event
                                        merge_count = merge_count + 1
                                    count = count + 1
                            new_ssafile.events.extend(event_list)
                            split_count = split_count + len(event_list)
                            break
                        split_events = split_event(joint_event, last_index)
                        event_list.append(split_events[0])
                        joint_event = split_events[1]
                    else:
                        break

                if last_index < 0:
                    if last_index > -2:
                        event_count = event_count + 1
                    continue

        new_ssafile.events.append(temp_ssafile.events[event_count])
        event_count = event_count + 1

    for events in sorted_events_list:
        new_ssafile.events = events + new_ssafile.events

    print(_("Merge {count} times.").format(count=merge_count))
    print(_("Split {count} times.").format(count=split_count))
    delta = len(subtitles.events) - len(new_ssafile.events)
    if delta > 0:
        print(_("Reduce {count} lines of events.").format(count=delta))
    else:
        print(_("Add {count} lines of events.").format(count=-delta))

    return new_ssafile
예제 #15
0
# Check if there any ass files found
if len(subtitles_full_path) > 0:
    logger.debug('Found {ASS_FILES_COUNT} ass files, They are {ASS_FILES_LIST}'.format_map({
        'ASS_FILES_COUNT': len(subtitles_full_path),
        'ASS_FILES_LIST': str(subtitles_full_path),
    }))
else:
    logger.warning('Cannot find any ass files.')
    exit()

# create an ass file for unparsed Dialogues
# The idea of this file is: if this script is not able to parse
# the file for any reason and throw an exception, in this case
# the file should just store the original Dialogue with it's style
# This way we can keep all these broken rows to be investigated later.
unparsed_ass = pysubs2.SSAFile()

# For each subtitle file
for full_file_path in subtitles_full_path:

    # Logging the current file
    logger.debug('Working on file {FILE_NAME}'.format_map({'FILE_NAME': full_file_path}))

    try:
        # Load the subtitle file and parse it
        fl = pysubs2.load(full_file_path)
        logger.debug('Loaded the file successfully.')

        logger.debug('File "{FILE_NAME}" has "{STYLES_NUMBER}" styles, and there names are \n"{STYLES_LIST}"'.format_map({
            'FILE_NAME': full_file_path,
            'STYLES_NUMBER': len(fl.styles),
예제 #16
0
if record_raw:
    ensure_dir(current_directory + '/comment_log_raw')

raw_log_path = current_directory + '/comment_log_raw/' + chat_channel + '.txt'
log_path = current_directory + '/comment_log/' + chat_channel + '.txt'

subs_log_path = current_directory + '/comment_log/' + chat_channel + '.ass'

bot = irc_bot.irc_bot(username,
                      oauth,
                      chat_channel,
                      chat_server[0],
                      chat_server[1],
                      twitchclient_version=twitchclient_version)

subs = pysubs2.SSAFile()
i = 0

text = ''

while 1:
    raw_msg_list = bot.get_message()
    if len(raw_msg_list) > 0:
        if len(text) > 0:
            end = pysubs2.time.make_time(ms=datetime.now().microsecond)
            subs.insert(
                i,
                pysubs2.SSAEvent(start=start,
                                 end=end,
                                 text=text.replace('\\', '\\\\')))
            i = i + 1