Esempio n. 1
0
def test_sort_and_reindex_not_in_place_matches(input_subs, start_index):
    # Make copies for both sort_and_reindex calls so that they can't affect
    # each other
    not_in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]
    in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]

    nip_ids = [id(sub) for sub in not_in_place_subs]
    ip_ids = [id(sub) for sub in in_place_subs]

    not_in_place_output = list(
        srt.sort_and_reindex(
            not_in_place_subs, start_index=start_index,
        ),
    )
    in_place_output = list(
        srt.sort_and_reindex(
            in_place_subs, start_index=start_index, in_place=True
        ),
    )

    # The results in each case should be the same
    subs_eq(not_in_place_output, in_place_output)

    # Not in place sort_and_reindex should have created new subs
    assert_false(any(id(sub) in nip_ids for sub in not_in_place_output))

    # In place sort_and_reindex should be reusing the same subs
    assert_true(all(id(sub) in ip_ids for sub in in_place_output))
Esempio n. 2
0
def test_sort_and_reindex_not_in_place_matches(input_subs, start_index):
    # Make copies for both sort_and_reindex calls so that they can't affect
    # each other
    not_in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]
    in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]

    nip_ids = [id(sub) for sub in not_in_place_subs]
    ip_ids = [id(sub) for sub in in_place_subs]

    not_in_place_output = list(
        srt.sort_and_reindex(
            not_in_place_subs,
            start_index=start_index,
        ), )
    in_place_output = list(
        srt.sort_and_reindex(in_place_subs,
                             start_index=start_index,
                             in_place=True), )

    # The results in each case should be the same
    subs_eq(not_in_place_output, in_place_output)

    # Not in place sort_and_reindex should have created new subs
    assert_false(any(id(sub) in nip_ids for sub in not_in_place_output))

    # In place sort_and_reindex should be reusing the same subs
    assert_true(all(id(sub) in ip_ids for sub in in_place_output))
Esempio n. 3
0
    def __init__(
            self,
            id="",
            f=None,  # one or many (list) file
            comment='##',
            set_id_as_prog=True,
            debug=False):

        if isinstance(f, str):
            f = [f]
        elif isinstance(f, list):
            pass
        else:
            raise ValueError("f must be a str path or a list of path")

        # id of the srt
        self.id = id

        # raw content
        content = []
        for file in sorted(f):
            with open(file) as s:
                for line in s:
                    if not line.startswith(comment):
                        content.append(line)
        if debug:
            self.raw = content

        # parsing
        subs_generator = srt.parse("".join(content))
        subs = list(subs_generator)
        if set_id_as_prog:
            subs = list(srt.sort_and_reindex(subs))

        self.subs = subs
Esempio n. 4
0
def merge(file_output, file_inputs):
  print('begin merging {} into {}'.format(file_inputs, file_output))
  srt_inputs = [read_srt(file_input) for file_input in file_inputs]
  srt_merge = srt_inputs[0]
  if len(srt_inputs) >= 2:
    for srt_input in srt_inputs[1:]:
      srt_merge = merge_two_srt_lists(srt_merge, srt_input)
  write_srt(file_output, srt.sort_and_reindex(srt_merge))
  print('end merging {} into {}'.format(file_inputs, file_output))
Esempio n. 5
0
def test_sort_and_reindex_same_start_time_uses_end(input_subs):
    for sub in input_subs:
        # Pin all subs to same start time so that end time is compared only
        sub.start = timedelta(1)

    reindexed_subs = list(srt.sort_and_reindex(input_subs, in_place=True))

    # The subtitles should be sorted by end time when start time is the same
    expected_sorting = sorted(input_subs, key=lambda sub: sub.end)
    eq(reindexed_subs, expected_sorting)
Esempio n. 6
0
def test_subs_starts_before_zero_removed(positive_subs, negative_subs, negative_td):
    for sub in negative_subs:
        sub.start = negative_td
        sub.end = negative_td  # Just to avoid tripping any start >= end errors

    subs = positive_subs + negative_subs
    composed_subs = list(srt.sort_and_reindex(subs, in_place=True))

    # There should be no negative subs
    subs_eq(composed_subs, positive_subs, any_order=True)
Esempio n. 7
0
def test_sort_and_reindex_same_start_time_uses_end(input_subs):
    for sub in input_subs:
        # Pin all subs to same start time so that end time is compared only
        sub.start = timedelta(1)

    reindexed_subs = list(srt.sort_and_reindex(input_subs, in_place=True))

    # The subtitles should be sorted by end time when start time is the same
    expected_sorting = sorted(input_subs, key=lambda sub: sub.end)
    assert reindexed_subs == expected_sorting
Esempio n. 8
0
def test_subs_starts_before_zero_removed(positive_subs, negative_subs, negative_td):
    for sub in negative_subs:
        sub.start = negative_td
        sub.end = negative_td  # Just to avoid tripping any start >= end errors

    subs = positive_subs + negative_subs
    composed_subs = list(srt.sort_and_reindex(subs, in_place=True))

    # There should be no negative subs
    subs_eq(composed_subs, positive_subs, any_order=True)
Esempio n. 9
0
def test_sort_and_reindex_no_skip(input_subs):
    # end time > start time should not trigger a skip if skip=False
    for sub in input_subs:
        old_start = sub.start
        sub.start = sub.end
        sub.end = old_start

    reindexed_subs = list(srt.sort_and_reindex(input_subs, skip=False))

    # Nothing should have been skipped
    assert len(reindexed_subs) == len(input_subs)
Esempio n. 10
0
def main(argv):
    # Parse arguments
    primary_language = ''
    secondary_language = ''
    try:
        opts, args = getopt.getopt(
            argv, "hp:s:", ["primary-language=", "secondary-language="])
    except getopt.GetoptError:
        print ('merge_subtitles.py -p <primary_language>'
               '-s <secondary_language>')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print ('merge_subtitles.py -p <primary_language>'
                   ' -s <secondary_language>')
            sys.exit()
        elif opt in ("-p", "--primary-language"):
            primary_language = arg
        elif opt in ("-s", "--secondary-language"):
            secondary_language = arg

    # Read files and convert to list
    primary_path = glob.glob('./*.' + primary_language + '.srt')[0]
    secondary_path = glob.glob('./*.' + secondary_language + '.srt')[0]
    primary_file = open(primary_path, 'r', errors='ignore')
    primary_text = primary_file.read()
    primary_file.close()
    secondary_file = open(secondary_path, 'r', errors='ignore')
    secondary_text = secondary_file.read()
    secondary_file.close()
    subtitle_generator_primary = srt.parse(primary_text)
    subtitles_primary = list(subtitle_generator_primary)
    subtitle_generator_secondary = srt.parse(secondary_text)
    subtitles_secondary = list(subtitle_generator_secondary)

    # Make primary yellow
    for s in subtitles_primary:
        s.content = '<font color="#ffff54">' + s.content + '</font>'

    # Place secondary on top
    for s in subtitles_secondary:
        s.content = '{\\an8}' + s.content

    # Merge
    subtitles_merged = subtitles_primary + subtitles_secondary
    subtitles_merged = list(srt.sort_and_reindex(subtitles_merged))

    # Write merged to file
    merged_path = primary_path.replace(primary_language, 'merged')
    merged_text = srt.compose(subtitles_merged)
    merged_file = open(merged_path, 'w')
    merged_file.write(merged_text)
    merged_file.close()
Esempio n. 11
0
def test_subs_missing_content_removed(content_subs, contentless_subs,
                                      contentless_text):
    for sub in contentless_subs:
        sub.content = contentless_text

    subs = contentless_subs + content_subs
    composed_subs = list(srt.sort_and_reindex(subs, in_place=True))

    # We should have composed the same subs as there are in content_subs, as
    # all contentless_subs should have been stripped.
    subs_eq(composed_subs, content_subs, any_order=True)

    # The subtitles should be reindexed starting at start_index, excluding
    # contentless subs
    default_start_index = 1
    assert [sub.index for sub in composed_subs] == list(
        range(default_start_index, default_start_index + len(composed_subs)))
Esempio n. 12
0
def main():
    n = int(input('Amount of .srt files: '))
    subtitlesmerged = []
    for i in (1, n):
        p = input('File no. %s: ' % i)
        with open(p, 'r') as file:
            data = file.read()
        data.replace(u'\ufeff', '')
        subtitles_gen = srt.parse(data)
        subtitles = list(subtitles_gen)
        subtitlesmerged = subtitlesmerged + subtitles

    subtitles = list(srt.sort_and_reindex(subtitlesmerged))

    subtitles_composed = srt.compose(subtitles)
    outputpath = input('Write merged .srt file to: ')
    with open(outputpath, 'w') as output:
        output.write(subtitles_composed)
Esempio n. 13
0
def test_subs_missing_content_removed(content_subs, contentless_subs, contentless_text):
    for sub in contentless_subs:
        sub.content = contentless_text

    subs = contentless_subs + content_subs
    composed_subs = list(srt.sort_and_reindex(subs, in_place=True))

    # We should have composed the same subs as there are in content_subs, as
    # all contentless_subs should have been stripped.
    subs_eq(composed_subs, content_subs, any_order=True)

    # The subtitles should be reindexed starting at start_index, excluding
    # contentless subs
    default_start_index = 1
    eq(
        [sub.index for sub in composed_subs],
        list(range(default_start_index, default_start_index + len(composed_subs))),
    )
Esempio n. 14
0
def test_sort_and_reindex(input_subs, start_index):
    for sub in input_subs:
        # Pin all subs to same end time so that start time is compared only,
        # must be guaranteed to be < sub.start, see how
        # start_timestamp_strategy is done
        sub.end = timedelta(500001)

    reindexed_subs = list(
        srt.sort_and_reindex(input_subs,
                             start_index=start_index,
                             in_place=True))

    # The subtitles should be reindexed starting at start_index
    assert [sub.index for sub in reindexed_subs
            ] == list(range(start_index, start_index + len(input_subs)))

    # The subtitles should be sorted by start time
    expected_sorting = sorted(input_subs, key=lambda sub: sub.start)
    assert reindexed_subs == expected_sorting
Esempio n. 15
0
def test_sort_and_reindex(input_subs, start_index):
    for sub in input_subs:
        # Pin all subs to same end time so that start time is compared only
        sub.end = timedelta(1)

    reindexed_subs = list(
        srt.sort_and_reindex(
            input_subs,
            start_index=start_index,
            in_place=True,
        ), )

    # The subtitles should be reindexed starting at start_index
    eq([sub.index for sub in reindexed_subs],
       list(range(start_index, start_index + len(input_subs))))

    # The subtitles should be sorted by start time
    expected_sorting = sorted(input_subs, key=lambda sub: sub.start)
    eq(reindexed_subs, expected_sorting)
Esempio n. 16
0
def test_sort_and_reindex(input_subs, start_index):
    for sub in input_subs:
        # Pin all subs to same end time so that start time is compared only,
        # must be guaranteed to be < sub.start, see how
        # start_timestamp_strategy is done
        sub.end = timedelta(500001)

    reindexed_subs = list(
        srt.sort_and_reindex(input_subs, start_index=start_index, in_place=True)
    )

    # The subtitles should be reindexed starting at start_index
    eq(
        [sub.index for sub in reindexed_subs],
        list(range(start_index, start_index + len(input_subs))),
    )

    # The subtitles should be sorted by start time
    expected_sorting = sorted(input_subs, key=lambda sub: sub.start)
    eq(reindexed_subs, expected_sorting)
Esempio n. 17
0
def test_sort_and_reindex(input_subs, start_index):
    for sub in input_subs:
        # Pin all subs to same end time so that start time is compared only
        sub.end = timedelta(1)

    reindexed_subs = list(
        srt.sort_and_reindex(
            input_subs, start_index=start_index, in_place=True,
        ),
    )

    # The subtitles should be reindexed starting at start_index
    eq(
        [sub.index for sub in reindexed_subs],
        list(range(start_index, start_index + len(input_subs)))
    )

    # The subtitles should be sorted by start time
    expected_sorting = sorted(input_subs, key=lambda sub: sub.start)
    eq(reindexed_subs, expected_sorting)
def preprocess_subs(contents):
    generator = srt.parse(contents)
    return list(srt.sort_and_reindex(generator))
Esempio n. 19
0
sub_n_translated = []
translator = Translator()

for s in sub_n:
    s_translated = translator.translate(s,
                                        src=original_language,
                                        dest=language_to_translate)
    sub_n_translated.append(s_translated.text)
    print('.')

sub_translated = []

for s in sub_n_translated:
    sub_translated.extend(s.split('\n'))

sub = srt.parse(subtitle)

new_subititle = []

for i, s in enumerate(list(sub)):
    s.content = sub_translated[i]
    new_subititle.append(s)

new_subititle = list(srt.sort_and_reindex(new_subititle))

file = open(srt_translated, 'w')
file.write(srt.compose(new_subititle))
file.close()

print(srt_translated)
Esempio n. 20
0
 def transform(self, *args):
     self.srt_list = self.function(self.srt_list, *args)
     self.srt_list = list(srt.sort_and_reindex(self.srt_list))
Esempio n. 21
0
    files = extractAudio(args.input,
                         args.temp_dir,
                         smoothing_window=args.silence_window,
                         weight=args.silence_weight)

    # Init the stt
    stt = STTPipeline(args.model_dir)

    # Start transcribing
    print("Transcribing...")

    subs = []
    for w_file in tqdm(files):
        start, end, transcription = process_audio(w_file, stt)
        if len(transcription.strip()) == 0:
            continue

        subs.append(
            srt.Subtitle(0, datetime.timedelta(seconds=float(start)),
                         datetime.timedelta(seconds=float(end)),
                         transcription))

    # write output
    print(f"Writing subtitle file to {args.output}...")
    with open(args.output, "w", encoding="utf-8") as f:
        srt.sort_and_reindex(subs, in_place=True)
        f.write(srt.compose(subs))

    print("Removing temporary files...")
    shutil.rmtree(args.temp_dir)
Esempio n. 22
0
def parse_srt(settings, file, summary, dry_run, quiet, verbose):
    if dry_run or verbose:
        print("Parsing '{0}'...".format(file))

    try:
        original_subtitles = None
        with open(file, "r", encoding="utf-8") as filehandler:
            original_subtitles = filehandler.read()
    except:
        print()
        print("Couldn't open file '{0}'".format(file))
        return False

    try:
        original_subtitles = list(srt.parse(original_subtitles))
    except:
        print()
        print("Trouble parsing subtitles in '{0}'".format(file))
        return False

    new_subtitle_file = []
    new_subtitle = None

    removed_line_count = 0
    modified_line_count = 0

    for i in range(len(original_subtitles)):
        original_subtitle_text = original_subtitles[i].content
        new_subtitle = srt.Subtitle(
            i,
            start=original_subtitles[i].start,
            end=original_subtitles[i].end,
            content=original_subtitles[i].content,
            proprietary=original_subtitles[1].proprietary,
        )

        line_history = []

        for rule in settings["rules"]:
            if new_subtitle is None:
                break

            if "only_if_match" in rule:
                if not fnmatch.fnmatch(file, rule["only_if_match"]):
                    continue

            line_before_rule_run = new_subtitle.content

            if rule["type"] == "regex":
                if rule["action"] == "replace":
                    new_subtitle.content = re.sub(
                        rule["pattern"],
                        rule["value"],
                        new_subtitle.content,
                        re.MULTILINE,
                    )
                elif rule["action"] == "delete":
                    if re.findall(rule["pattern"], new_subtitle.content,
                                  re.MULTILINE):
                        new_subtitle = None
            elif rule["type"] == "string":
                if rule["action"] == "replace":
                    new_subtitle.content.replace(rule["pattern"],
                                                 rule["value"])
                elif rule["action"] == "delete":
                    if new_subtitle.content.find(rule["pattern"]) == -1:
                        new_subtitle = None

            if new_subtitle is None:
                line_history.append(rule["name"])
            elif new_subtitle.content != line_before_rule_run:
                line_history.append(rule["name"])

        if new_subtitle is not None:
            if new_subtitle.content != "":
                new_subtitle_file.append(new_subtitle)
            if new_subtitle.content != original_subtitle_text:
                modified_line_count += 1
                if verbose:
                    if not quiet:
                        print()
                        print("{0}".format(
                            wrap_sub(original_subtitle_text, "-")))
                        print("{0}".format(wrap_sub(new_subtitle.content,
                                                    "+")))
                        print("|By rule(s): {0}".format(", ".join(
                            map(str, line_history))))
        else:
            removed_line_count += 1
            if verbose:
                if not quiet:
                    print()
                    print("{0}".format(wrap_sub(original_subtitle_text, "-")))
                    print("|By rule: {0}".format(line_history[-1]))

    if not dry_run:
        new_subtitle_file = list(srt.sort_and_reindex(new_subtitle_file))
        if (modified_line_count != 0 or removed_line_count != 0
                or new_subtitle_file != original_subtitles):
            print()
            if modified_line_count == 0 and removed_line_count == 0 and not quiet:
                print(
                    "Only changes to sorting and indexing found; No changes to subtitles detected."
                )
            if not quiet or verbose:
                print("Saving subtitle file {0}...".format(file))
                print()
            with open(file, "w", encoding="utf-8") as filehandler:
                filehandler.write(srt.compose(new_subtitle_file))
        else:
            if not quiet or verbose:
                print("No changes to save")
                print()

    if summary or verbose:
        if dry_run:
            if verbose:
                print()
            print(
                "Summary: {0} Lines to be modified; {1} Lines to be removed; '{2}'"
                .format(modified_line_count, removed_line_count, file))
        else:
            print(
                "Summary: {0} Lines modified; {1} Lines removed; '{2}'".format(
                    modified_line_count, removed_line_count, file))
        print()

    return True
Esempio n. 23
0
def merge_subs(all_file_list):
    mergesub = list()
    filecount = 0
    for file_name in all_file_list:
        ftype = os.path.splitext(file_name)[1]
        if ftype == r".srt":
            filecount = filecount + 1
            with open(file_name,"r",encoding="utf-8") as fileread:
                mergesub_list = list(srt.parse(fileread,ignore_errors=False))
                if filecount == 1:
                    for i in range (len(mergesub_list)):
                        mergesub.append(mergesub_list[i])

                    def langfixed(subtitle_1,subtitle_2):
                        for i in range (len(subtitle_2)):
                            for j in range (len(subtitle_1)):
                                startflag = subtitle_1[j].start
                                endflag = subtitle_1[j].end
                                start = subtitle_2[i].start
                                end = subtitle_2[i].end
                                
                                if j == 0:
                                    if start <= startflag and startflag < end < endflag:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if start <= startflag and endflag <= end:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if startflag < start < endflag and endflag <= end < mergesub[j+1].start:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if startflag < start < endflag and endflag <= end:
                                        if end - mergesub[j+2].start > timedelta(microseconds=0):
                                            mergesub[j+1].content = mergesub[j+1].content + "\n" +  mergesub_list[i].content
                                            break
                                        if endflag - start > end - mergesub[j+1].start:
                                            mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                            break
                                        else:
                                            mergesub[j+1].content = mergesub[j+1].content + "\n" +  mergesub_list[i].content
                                            break
                                        if endflag - start == end - mergesub[j+1].start:
                                            mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                            break
                                    if startflag <= start and end <= endflag:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if start < startflag and end < startflag:
                                        mergesub.insert(j,mergesub_list[i])
                                        break

                                else:
                                    if mergesub[j-1].end < start <= startflag and startflag < end < endflag:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if start <= startflag and endflag <= end:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if startflag < start < endflag and endflag <= end < mergesub[j+1].start:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if startflag < start < endflag and endflag <= end:
                                        if j+2 < len(mergesub):
                                            if end - mergesub[j+2].start > timedelta(microseconds=0):
                                                mergesub[j+1].content = mergesub[j+1].content + "\n" +  mergesub_list[i].content
                                                break
                                        if j+1 < len(mergesub):
                                            if endflag - start > end - mergesub[j+1].start:
                                                mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                                break
                                            else:
                                                mergesub[j+1].content = mergesub[j+1].content + "\n" +  mergesub_list[i].content
                                                break
                                            if endflag - start == end - mergesub[j+1].start:
                                                mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                                break
                                    if startflag <= start and end <= endflag:
                                        mergesub[j].content = mergesub[j].content + "\n" +  mergesub_list[i].content
                                        break
                                    if start < startflag and end < startflag:
                                        mergesub.insert(j,mergesub_list[i])
                                        break
                else:
                    if len(mergesub_list) > len(mergesub):
                        langfixed(mergesub,mergesub_list)

                    else:
                        langfixed(mergesub_list,mergesub)
                        
    merge = list(srt.sort_and_reindex(mergesub))
    
    total_file = open("merge.srt","w",encoding="utf-8")
    total_file.writelines(srt.compose(merge))
    total_file.close()