コード例 #1
0
ファイル: lrpi_play.py プロジェクト: LUSHDigital/lrpi_record
def tick():
    global subs
    global player
    global last_played
    global TICK_TIME, DEBUG
    # print(subs[0])
    t = perf_counter()
    # ts = str(timedelta(seconds=t)).replace('.',',')
    # tsd = str(timedelta(seconds=t+10*TICK_TIME)).replace('.',',')
    ts = SubRipTime(seconds = t)
    tsd = SubRipTime(seconds = t+1*TICK_TIME)
    #print(dir(player))
    pp = player.get_position()
    ptms = player.get_time()/1000.0
    pt = SubRipTime(seconds=(player.get_time()/1000.0))
    ptd = SubRipTime(seconds=(player.get_time()/1000.0+1*TICK_TIME))
    if DEBUG:
        print('Time: %s | %s | %s - %s | %s - %s | %s | %s' % (datetime.now(),t,ts,tsd,pt,ptd,pp,ptms))
        print('Finding subtitle starting at %s and ending at %s' % (pt, ptd))
    # sub, i = find_subtitle(subs, ts, tsd)
    sub, i = find_subtitle(subs, pt, ptd, lo=last_played)
    # sub, i = find_subtitle(subs, pt, ptd)

    # sub_list = find_subtitles(subs, pt, ptd, lo=last_played)
    if DEBUG:
        print('Result of find_subtitle: ', i)
        # print('Result of find_subtitles: ', len(sub_list))
    # hours, minutes, seconds, milliseconds = time_convert(sub.start)
    # t = seconds + minutes*60 + hours*60*60 + milliseconds/1000.0
    if sub!="": # and i > last_played:
        print("Light event:", i, sub)
        # print("Trigger light event %s" % i)
        trigger_light(sub)
        # sleep(.1)
        last_played=i
コード例 #2
0
    def generate_srt_from_sjson(sjson_subs):
        """
        Generate transcripts from sjson to SubRip (*.srt)

        Arguments:
            sjson_subs (dict): `sjson` subs.

        Returns:
            Subtitles in SRT format.
        """
        output = ''

        equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(sjson_subs['text'])
        if not equal_len:
            return output

        for i in range(len(sjson_subs['start'])):
            item = SubRipItem(
                index=i,
                start=SubRipTime(milliseconds=sjson_subs['start'][i]),
                end=SubRipTime(milliseconds=sjson_subs['end'][i]),
                text=sjson_subs['text'][i]
            )
            output += (str(item))
            output += '\n'
        return output
コード例 #3
0
def tick():
    global subs
    global player
    global last_played
    global TICK_TIME
    # print(subs[0])
    t = perf_counter()
    # ts = str(timedelta(seconds=t)).replace('.',',')
    # tsd = str(timedelta(seconds=t+10*TICK_TIME)).replace('.',',')
    ts = SubRipTime(seconds = t)
    tsd = SubRipTime(seconds = t+1*TICK_TIME)
    # print(dir(player))
    pp = player.get_position()
    ptms = player.get_time()/1000.0
    pt = SubRipTime(seconds=(player.get_time()/1000.0))
    ptd = SubRipTime(seconds=(player.get_time()/1000.0+1*TICK_TIME))
    print('Time: %s | %s | %s - %s | %s - %s | %s | %s' % (datetime.now(),t,ts,tsd,pt,ptd,pp,ptms))
    # sub, i = find_subtitle(subs, ts, tsd)
    sub, i = find_subtitle(subs, pt, ptd)
    # hours, minutes, seconds, milliseconds = time_convert(sub.start)
    # t = seconds + minutes*60 + hours*60*60 + milliseconds/1000.0
    print("Subtitle:", sub, i)
    if sub!="" and i > last_played:
        trigger_light_hue(sub)
        last_played=i
コード例 #4
0
    def triggerPreviousEvent(self, pos):
        if LIGHTING_MSGS:
            print("Finding last lighting command from pos: ", pos)

        pp = pos
        pt = SubRipTime(seconds=pp)
        ptd = SubRipTime(seconds=(pp + 1 * TICK_TIME))

        if VERBOSE and DEBUG:
            print("Finding last light event, starting from: ")
            print("pt: ", ptd)
            print("ptd: ", ptd)

        sub, i = self.find_subtitle(self.subs, pt, ptd, backwards=True)

        if LIGHTING_MSGS:
            print("Seeking, found sub:", sub, " at pos: ", i)

        if sub != "":  #and i > self.last_played:
            if LIGHTING_MSGS and DEBUG:
                print(i, "Found last lighting event!:", sub)
            # print("Trigger light event %s" % i)
            self.trigger_light(sub)
            self.last_played = i
            if DEBUG:
                print('last_played: ', i)
コード例 #5
0
def generate_srt_from_sjson(sjson_subs, speed):
    """Generate transcripts with speed = 1.0 from sjson to SubRip (*.srt).

    :param sjson_subs: "sjson" subs.
    :param speed: speed of `sjson_subs`.
    :returns: "srt" subs.
    """

    output = ''

    equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(
        sjson_subs['text'])
    if not equal_len:
        return output

    sjson_speed_1 = generate_subs(speed, 1, sjson_subs)

    for i in range(len(sjson_speed_1['start'])):
        item = SubRipItem(
            index=i,
            start=SubRipTime(milliseconds=sjson_speed_1['start'][i]),
            end=SubRipTime(milliseconds=sjson_speed_1['end'][i]),
            text=sjson_speed_1['text'][i])
        output += (unicode(item))
        output += '\n'
    return output
コード例 #6
0
ファイル: predictor.py プロジェクト: seainm/subaligner
 def __on_frame_timecodes(self, subs: List[SubRipItem]) -> None:
     for sub in subs:
         millis_per_frame = self.__feature_embedder.step_sample * 1000
         new_start_millis = round(int(str(sub.start).split(",")[1]) / millis_per_frame + 0.5) * millis_per_frame
         new_start = str(sub.start).split(",")[0] + "," + str(int(new_start_millis)).zfill(3)
         new_end_millis = round(int(str(sub.end).split(",")[1]) / millis_per_frame - 0.5) * millis_per_frame
         new_end = str(sub.end).split(",")[0] + "," + str(int(new_end_millis)).zfill(3)
         sub.start = SubRipTime.coerce(new_start)
         sub.end = SubRipTime.coerce(new_end)
コード例 #7
0
def merge_srt(chn_file, eng_file, output_file):
    delta = SubRipTime(milliseconds=500)
    subs_a = SubRipFile.open(chn_file)
    subs_b = SubRipFile.open(eng_file)
    out = merge_subtitle(subs_a, subs_b, delta)
    if os.path.isfile(output_file):
        os.remove(output_file)
    out.save(output_file, encoding='utf8')
コード例 #8
0
 def offset(self):
     d = self.media.offset
     hours, remainder = divmod(d.seconds, 3600)
     minutes, seconds = divmod(remainder, 60)
     return SubRipTime(hours=hours,
                       minutes=minutes,
                       seconds=seconds,
                       milliseconds=d.microseconds / 1000)
コード例 #9
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_from_time(self):
     time_obj = time(1, 2, 3, 4000)
     self.assertEqual(SubRipTime(1, 2, 3, 4), time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 5) >= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 3) <= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 0) != time_obj)
     self.assertEqual(SubRipTime(1, 2, 3, 4).to_time(), time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 5).to_time() >= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 3).to_time() <= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 0).to_time() != time_obj)
コード例 #10
0
ファイル: test_srttime.py プロジェクト: GbalsaC/bitnamiP
class TestSimpleTime(unittest.TestCase):

    def setUp(self):
        self.time = SubRipTime()

    def test_default_value(self):
        self.assertEquals(self.time.ordinal, 0)

    def test_micro_seconds(self):
        self.time.milliseconds = 1
        self.assertEquals(self.time.milliseconds, 1)
        self.time.hours += 42
        self.assertEquals(self.time.milliseconds, 1)
        self.time.milliseconds += 1000
        self.assertEquals(self.time.seconds, 1)

    def test_seconds(self):
        self.time.seconds = 1
        self.assertEquals(self.time.seconds, 1)
        self.time.hours += 42
        self.assertEquals(self.time.seconds, 1)
        self.time.seconds += 60
        self.assertEquals(self.time.minutes, 1)

    def test_minutes(self):
        self.time.minutes = 1
        self.assertEquals(self.time.minutes, 1)
        self.time.hours += 42
        self.assertEquals(self.time.minutes, 1)
        self.time.minutes += 60
        self.assertEquals(self.time.hours, 43)

    def test_hours(self):
        self.time.hours = 1
        self.assertEquals(self.time.hours, 1)
        self.time.minutes += 42
        self.assertEquals(self.time.hours, 1)

    def test_shifting(self):
        self.time.shift(1, 1, 1, 1)
        self.assertEquals(self.time, (1, 1, 1, 1))

    def test_descriptor_from_class(self):
        self.assertRaises(AttributeError, lambda: SubRipTime.hours)
コード例 #11
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
class TestSimpleTime(unittest.TestCase):
    def setUp(self):
        self.time = SubRipTime()

    def test_default_value(self):
        self.assertEqual(self.time.ordinal, 0)

    def test_micro_seconds(self):
        self.time.milliseconds = 1
        self.assertEqual(self.time.milliseconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.milliseconds, 1)
        self.time.milliseconds += 1000
        self.assertEqual(self.time.seconds, 1)

    def test_seconds(self):
        self.time.seconds = 1
        self.assertEqual(self.time.seconds, 1)
        self.time.hours += 42
        self.assertEqual(self.time.seconds, 1)
        self.time.seconds += 60
        self.assertEqual(self.time.minutes, 1)

    def test_minutes(self):
        self.time.minutes = 1
        self.assertEqual(self.time.minutes, 1)
        self.time.hours += 42
        self.assertEqual(self.time.minutes, 1)
        self.time.minutes += 60
        self.assertEqual(self.time.hours, 43)

    def test_hours(self):
        self.time.hours = 1
        self.assertEqual(self.time.hours, 1)
        self.time.minutes += 42
        self.assertEqual(self.time.hours, 1)

    def test_shifting(self):
        self.time.shift(1, 1, 1, 1)
        self.assertEqual(self.time, (1, 1, 1, 1))

    def test_descriptor_from_class(self):
        self.assertRaises(AttributeError, lambda: SubRipTime.hours)
コード例 #12
0
    def generate_word_contexts(self, length):
        self.word_contexts = {}
        self.len_windows = []
        delta = SubRipTime.from_ordinal(int(length) *
                                        1000)  # (ordinal is milliseconds)
        if not self.all_frames:
            self.full_tokens()

        for i, f in enumerate(self.all_frames):
            # Get data from frame
            f_start = f["start"]
            f_end = f["end"]
            f_tokens = f["tokens"]
            start_of_window = f_start - delta
            end_of_window = f_end + delta

            if not f_tokens:  # The frame has no tokens
                continue

            f_context = f["tokens"].copy()  # Initialization of the context

            # Add tokens of preceding frames
            j = -1
            while (i + j
                   ) >= 0 and self.all_frames[i + j]["end"] >= start_of_window:
                f_context.extend(self.all_frames[i + j]["tokens"])
                j -= 1

            # Add tokens of later frames
            j = 1
            while (i + j) <= (len(self.all_frames) - 1) and self.all_frames[
                    i + j]["start"] <= end_of_window:
                f_context.extend(self.all_frames[i + j]["tokens"])
                j += 1

            # Add to context dictionary
            for t in f["tokens"]:
                self.len_windows.append(
                    len(f_context) -
                    1)  # This is for the length of windows analysis
                if t not in self.word_contexts:
                    self.word_contexts[t] = {}
                for c in f_context:
                    self.word_contexts[t][c] = self.word_contexts[t].get(c,
                                                                         0) + 1
                self.word_contexts[t][t] -= 1
                if self.word_contexts[t][t] == 0:
                    del self.word_contexts[t][t]

        if not self.check_correct_start_end(
        ):  # If end and star are not correct, the matrices are not symmmetric
            self.correct_symmetry()

        return (self.word_contexts)
コード例 #13
0
 def __init__(self, index: int, media_path: MediaPath,
              pds: PaletteDefinitionSegment, ods: ObjectDefinitionSegment, wds: WindowDefinitionSegment):
     self.index = index
     self.start = SubRipTime.from_ordinal(ods.presentation_timestamp)
     self.end: Optional[SubRipTime] = None
     self.pds = pds
     self.ods = ods
     self.wds = wds
     self.media_path = media_path
     self.image = PgsImage(ods.img_data, pds.palettes)
     self.text: Optional[str] = None
     self.place: Optional[Tuple[int, int, int, int]] = None
コード例 #14
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_from_tuple(self):
     self.assertEqual((0, 0, 0, 0), SubRipTime())
     self.assertEqual((0, 0, 0, 1), SubRipTime(milliseconds=1))
     self.assertEqual((0, 0, 2, 0), SubRipTime(seconds=2))
     self.assertEqual((0, 3, 0, 0), SubRipTime(minutes=3))
     self.assertEqual((4, 0, 0, 0), SubRipTime(hours=4))
     self.assertEqual((1, 2, 3, 4), SubRipTime(1, 2, 3, 4))
コード例 #15
0
ファイル: subtitle.py プロジェクト: cash2one/AutoSystem
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
    """
    合并两种不同言语的srt字幕

    因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
    导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免

    参考https://github.com/byroot/pysrt/issues/17

    https://github.com/byroot/pysrt/issues/15

    :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
    :param sub_b:
    :param delta:
    :return:
    """
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #16
0
ファイル: subtitle.py プロジェクト: GoTop/AutoSystem
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
    """
    合并两种不同言语的srt字幕

    因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
    导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免

    参考https://github.com/byroot/pysrt/issues/17

    https://github.com/byroot/pysrt/issues/15

    :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
    :param sub_b:
    :param delta:
    :return:
    """
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #17
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_from_dict(self):
     self.assertEqual(dict(), SubRipTime())
     self.assertEqual(dict(milliseconds=1), SubRipTime(milliseconds=1))
     self.assertEqual(dict(seconds=2), SubRipTime(seconds=2))
     self.assertEqual(dict(minutes=3), SubRipTime(minutes=3))
     self.assertEqual(dict(hours=4), SubRipTime(hours=4))
     self.assertEqual(dict(hours=1, minutes=2, seconds=3, milliseconds=4),
                      SubRipTime(1, 2, 3, 4))
コード例 #18
0
    def generate_srt(self, text: str):
        """
        Generates .srt file with the given text and timestamps.
        :param text: String with all retrieved text.
        """
        self.create_subs_path()

        subs = open_srt(self.srt_path)
        texts = self.prepare_text(text.split(" "))
        timestamps = self.prepare_timestamps(texts)

        for i, (sentence, (start_timestamp,
                           end_timestamp)) in enumerate(zip(texts,
                                                            timestamps)):
            start_timestamp_list = [
                int(ts) for ts in start_timestamp.split(':')
            ]
            end_timestamp_list = [int(ts) for ts in end_timestamp.split(':')]

            sub = SubRipItem(index=i)
            sub.text = sentence

            sub.start = SubRipTime(hours=start_timestamp_list[0],
                                   minutes=start_timestamp_list[1],
                                   seconds=start_timestamp_list[2],
                                   milliseconds=start_timestamp_list[3])

            sub.end = SubRipTime(hours=end_timestamp_list[0],
                                 minutes=end_timestamp_list[1],
                                 seconds=end_timestamp_list[2],
                                 milliseconds=end_timestamp_list[3])

            subs.append(sub)

        # Saving result subtitles into file
        subs.save(self.srt_path, encoding='utf-8')

        logging.info(f"Generated subtitles are saved in {self.srt_path}")
コード例 #19
0
ファイル: mvyskoc_merge.py プロジェクト: EdwardBetts/pysrt
def merge_subtitle(sub_a, sub_b, delta):
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i-1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end-start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #20
0
def merge_subtitle(sub_a, sub_b, delta):
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in range(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
コード例 #21
0
def get_captions(client_name, clip_id):
    h = httplib2.Http()
    g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
    print "Fetching URL: %s" % g_url
    response, j = h.request(g_url)
    dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
    filename = dirname + "%s.srt" % clip_id
    subs = SubRipFile()

    if response.get('status') == '200':
        captions = []
        try:
            j = json.loads(j, strict=False)[0]
        except ValueError:
            ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
            try:
                j = json.loads(ts, strict=False)[0]
            except UnicodeDecodeError:
                ts = unicode(ts, errors='ignore')
                j = json.loads(ts, strict=False)[0]
        except:
            j = False

        sub_count = 0
        for item in j: 
            if item["type"] == "text":
                cap = item["text"]
                offset = round(float(item["time"]), 3)
                captions.append({'time': offset, 'text': cap})        
                end = get_cap_end(j, sub_count)
                if end:
                    subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
                    subs.append(subtitle)
           
            sub_count = sub_count + 1
        
        try:
            subs.save(path=filename, encoding="utf-8")
        except IOError:
            p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
            t = p.wait()

            subs.save(path=filename, encoding="utf-8")
            
        s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
        return (captions, s3_url)
    else:
        return ([], '')
コード例 #22
0
    def decode(cls, data: bytes, media_path: MediaPath):
        display_sets = PgsReader.decode(data, media_path)
        index = 0
        items = []
        for display_set in display_sets:
            if items and not display_set.has_image and display_set.wds:
                items[-1].end = SubRipTime.from_ordinal(display_set.wds[-1].presentation_timestamp)
                continue

            for (pds, ods, wds) in zip(display_set.pds, display_set.ods, display_set.wds):
                item = PgsSubtitleItem(index, media_path, pds, ods, wds)
                if items and items[-1].end is None and items[-1].start + 10000 >= item.start:
                    items[-1].end = max(items[-1].start, item.start - 1)
                items.append(item)
                index += 1

        for item in items:
            item.validate()

        return items
コード例 #23
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def setUp(self):
     self.time = SubRipTime(1, 2, 3, 4)
コード例 #24
0
ファイル: test_srttime.py プロジェクト: GbalsaC/bitnamiP
 def test_from_ordinal(self):
     self.assertEquals(SubRipTime.from_ordinal(3600000), {'hours': 1})
     self.assertEquals(SubRipTime(1), 3600000)
コード例 #25
0
bot = irc_bot.irc_bot(username,
                      oauth,
                      chat_channel,
                      chat_server[0],
                      chat_server[1],
                      twitchclient_version=twitchclient_version)

outsrt = SubRipFile()

text = ''

while 1:
    raw_msg_list = bot.get_message()
    if len(raw_msg_list) > 0:
        if len(text) > 0:
            end = SubRipTime.from_time(datetime.now())
            item = SubRipItem(0, start, end, text)
            outsrt.append(item)
        start = SubRipTime.from_time(datetime.now())
        text = ''
        timestamp = get_timestamp(timestamp_format)
        for item in raw_msg_list:
            if record_raw:
                log_add(raw_log_path, timestamp + ' ' + item + '\n')
            username, message = irc_bot.parse_user(item)
            if username != '':
                safe_print(chat_channel + " " + username + ": " + message)
                log_add(log_path,
                        timestamp + ' ' + username + ': ' + message + '\n')
                text += username + ": " + message + '\n'
                outsrt.clean_indexes()
コード例 #26
0
srt = SubRipFile()

# get all DisplaySets that contain an image
print("Loading DisplaySets...")
allsets = [ds for ds in tqdm(pgs.iter_displaysets())]

print(f"Running OCR on {len(allsets)} DisplaySets and building SRT file...")
subText = ""
subStart = 0
subIndex = 0
for ds in tqdm(allsets):
    if ds.has_image:
        # get Palette Display Segment
        pds = ds.pds[0]
        # get Object Display Segment
        ods = ds.ods[0]

        # img = make_image(ods, pds)
        # subText = pytesseract.image_to_string(img)
        subStart = ods.presentation_timestamp
    else:
        startTime = SubRipTime(milliseconds=int(subStart))
        endTime = SubRipTime(
            milliseconds=int(ds.end[0].presentation_timestamp))
        srt.append(SubRipItem(subIndex, startTime, endTime, "subText"))
        subIndex += 1

print(f"Done. SRT file saved as {srtFile}")
srt.save(srtFile, encoding='utf-8')
コード例 #27
0
ファイル: subtitle.py プロジェクト: cash2one/AutoSystem
def merge_video_subtitle(video_id):
    """
    将video_id的中英vtt字幕转换为srt字幕,然后合并为srt格式的字幕
    :param video_id:
    :return:
    """
    video = Video.objects.get(pk=video_id)

    # Settings default values
    delta = SubRipTime(milliseconds=500)
    encoding = "utf_8"

    if (video.subtitle_cn != '') & (video.subtitle_en != ''):
        # convert_file(input_captions = video.subtitle_cn, output_writer)

        # vtt格式的字幕
        # subs_cn_vtt = SubRipFile.open(video.subtitle_cn.path,
        # encoding=encoding)
        # subs_en_vtt = SubRipFile.open(video.subtitle_en.path,
        # encoding=encoding)

        # 将vtt字幕转换为srt
        subs_cn_srt_filename = '%s-%s.cn.srt' % (get_valid_filename(
            video.title), video.video_id)
        subs_cn_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_cn_srt_filename)

        # 此功能失效
        # subs_cn_srt_result = convert_file(
        # input_captions=video.subtitle_cn.path,output_writer=subs_cn_srt)

        subs_cn_srt_result = convert_subtilte_format(
            srt_file=video.subtitle_cn.path, ass_file=subs_cn_srt_path)

        subs_en_srt_filename = '%s-%s.en.srt' % (get_valid_filename(
            video.title), video.video_id)
        subs_en_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_en_srt_filename)
        # subs_en_srt_result = convert_file(
        # input_captions=video.subtitle_en.path,output_writer = subs_en_srt)
        subs_en_srt_path = convert_subtilte_format(
            srt_file=video.subtitle_en.path, ass_file=subs_en_srt_path)

        subs_cn_srt = SubRipFile.open(subs_cn_srt_path, encoding=encoding)
        subs_en_srt = SubRipFile.open(subs_en_srt_path, encoding=encoding)
        merge_subs = merge_subtitle(subs_cn_srt, subs_en_srt, delta)

        # 某些youtube视频的title有非ASCII的字符,或者/等不能出现在文件名中的字符
        # 所以使用django utils自带的get_valid_filename()转化一下
        # 注意:与youtube-dl自带的restrictfilenames获得的文件名不一样,
        # 也就是merge_subs_filename  与 subtitle_cn, subtitle_cn中名称可能会不一样
        # 标题中的 . 依然会保留
        merge_subs_filename = '%s-%s.zh-Hans.en.srt' % (get_valid_filename(
            video.title), video.video_id)

        merge_subs_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                       merge_subs_filename)

        merge_subs.save(merge_subs_path, encoding=encoding)

        video.subtitle_merge = merge_subs_path
        video.save(update_fields=['subtitle_merge'])
        return merge_subs_path
    else:
        return False
コード例 #28
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_parsing(self):
     for time_string, time_items in self.KNOWN_VALUES:
         self.assertEqual(time_string, SubRipTime(*time_items))
コード例 #29
0
ファイル: sublog_to_srt.py プロジェクト: WNRI/errantia
srt = SubRipFile(eol='\n', encoding='utf-8')
i = 1

for line in sublog:
    line = line.split(",", 1)
    if (line[0] and line[0][0] == '-'):
        if (START_TIME == None and line[0][:8] == '- start '):
            START_TIME = datetime.strptime(line[0], '- start ' + TIMEFORMAT +
            '\n')
        continue

    no = datetime.strptime(line[0], TIMEFORMAT) - START_TIME
    if (abs(no) > timedelta(1)):
        print("\nCan't go over a day in a subtitle! Delete non-used lines in" + \
                " log.\nLet there only be one '- start' line at the top of" + \
                " the log-file.")
        sys.exit(1)

    time = SubRipTime.from_ordinal(no.seconds*1000 + no.microseconds*0.001)

    item = SubRipItem(i, start=time, end=time + 30*1000,
            text=unicode(line[1], 'utf-8'))
    srt.append(item)
    i += 1

srt.clean_indexes()
#srt.save(path=sys.stdout)

for line in srt:
    sys.stdout.write(unicode(line).encode('utf-8'))
コード例 #30
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_from_ordinal(self):
     self.assertEqual(SubRipTime.from_ordinal(3600000), {'hours': 1})
     self.assertEqual(SubRipTime(1), 3600000)
コード例 #31
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_serialization(self):
     for time_string, time_items in self.KNOWN_VALUES:
         self.assertEqual(time_string, str(SubRipTime(*time_items)))
コード例 #32
0
ファイル: test_srttime.py プロジェクト: GbalsaC/bitnamiP
 def setUp(self):
     self.time = SubRipTime()
コード例 #33
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_mul(self):
     self.assertEqual(self.time * 2, SubRipTime(2, 4, 6, 8))
     self.assertEqual(self.time * 0.5, (0, 31, 1, 502))
コード例 #34
0
raw_log_path = current_directory + '/comment_log_raw/' + chat_channel + '.txt'
log_path = current_directory + '/comment_log/' + chat_channel + '.txt'

srt_log_path = current_directory + '/comment_log/' + chat_channel + '.srt'

bot = irc_bot.irc_bot(username, oauth, chat_channel, chat_server[0], chat_server[1], twitchclient_version = twitchclient_version)

outsrt = SubRipFile()

text = ''

while 1:
	raw_msg_list = bot.get_message()
	if len(raw_msg_list) > 0:
		if len(text) > 0:
			end = SubRipTime.from_time(datetime.now())
			item = SubRipItem(0, start, end, text)
			outsrt.append(item)
		start = SubRipTime.from_time(datetime.now())
		text = ''
		timestamp = get_timestamp(timestamp_format)
		for item in raw_msg_list:
			if record_raw:
				log_add(raw_log_path, timestamp + ' ' + item + '\n')
			username, message = irc_bot.parse_user(item)
			if username != '':
				safe_print(chat_channel + " " + username + ": " + message)
				log_add(log_path, timestamp + ' ' + username + ': ' + message + '\n')
				text += username + ": " + message + '\n'
				outsrt.clean_indexes()
				outsrt.save(srt_log_path, encoding='utf-8')
コード例 #35
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def setUp(self):
     self.time = SubRipTime()
コード例 #36
0
import sys
import os

from pysrt import SubRipFile  # https://github.com/byroot/pysrt
from pysrt import SubRipItem
from pysrt import SubRipTime

from textAnalyse import analyzeSubLevel

from fixEncoding import makeFileUtf8Bom
from syncSrts import syncSrts

delta = SubRipTime(milliseconds=500)
encoding = "utf_8"

this = sys.modules[__name__]

this.L1_sub_template = "{}"
this.L2_sub_template = "{}"

level_criterias = {
    '1': {
        'max_CEFR_level':
        'A1',  # lines with CEFR level > this will not be hidden
        'max_flesh_kincade_grade':
        4,  # lines with fk grade > this will not be hidden
        'max_characters':
        30,  # lines with more characters than this will never be hidden
        'max_words': 8,  # lines with more words than this will never be hidden
    },
    '2': {
コード例 #37
0
ファイル: test_srttime.py プロジェクト: zhiweizhong/pysrt
 def test_negative_serialization(self):
     self.assertEqual('00:00:00,000', str(SubRipTime(-1, 2, 3, 4)))