예제 #1
0
    def generate_srt_from_sjson(sjson_subs):
        """
        Generate transcripts from sjson to SubRip (*.srt)

        Arguments:
            sjson_subs (dict): `sjson` subs.

        Returns:
            Subtitles in SRT format.
        """
        output = ''

        equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(sjson_subs['text'])
        if not equal_len:
            return output

        for i in range(len(sjson_subs['start'])):
            item = SubRipItem(
                index=i,
                start=SubRipTime(milliseconds=sjson_subs['start'][i]),
                end=SubRipTime(milliseconds=sjson_subs['end'][i]),
                text=sjson_subs['text'][i]
            )
            output += (str(item))
            output += '\n'
        return output
예제 #2
0
def generate_srt_from_sjson(sjson_subs, speed):
    """Generate transcripts with speed = 1.0 from sjson to SubRip (*.srt).

    :param sjson_subs: "sjson" subs.
    :param speed: speed of `sjson_subs`.
    :returns: "srt" subs.
    """

    output = ''

    equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(
        sjson_subs['text'])
    if not equal_len:
        return output

    sjson_speed_1 = generate_subs(speed, 1, sjson_subs)

    for i in range(len(sjson_speed_1['start'])):
        item = SubRipItem(
            index=i,
            start=SubRipTime(milliseconds=sjson_speed_1['start'][i]),
            end=SubRipTime(milliseconds=sjson_speed_1['end'][i]),
            text=sjson_speed_1['text'][i])
        output += (unicode(item))
        output += '\n'
    return output
예제 #3
0
def tick():
    global subs
    global player
    global last_played
    global TICK_TIME, DEBUG
    # print(subs[0])
    t = perf_counter()
    # ts = str(timedelta(seconds=t)).replace('.',',')
    # tsd = str(timedelta(seconds=t+10*TICK_TIME)).replace('.',',')
    ts = SubRipTime(seconds = t)
    tsd = SubRipTime(seconds = t+1*TICK_TIME)
    #print(dir(player))
    pp = player.get_position()
    ptms = player.get_time()/1000.0
    pt = SubRipTime(seconds=(player.get_time()/1000.0))
    ptd = SubRipTime(seconds=(player.get_time()/1000.0+1*TICK_TIME))
    if DEBUG:
        print('Time: %s | %s | %s - %s | %s - %s | %s | %s' % (datetime.now(),t,ts,tsd,pt,ptd,pp,ptms))
        print('Finding subtitle starting at %s and ending at %s' % (pt, ptd))
    # sub, i = find_subtitle(subs, ts, tsd)
    sub, i = find_subtitle(subs, pt, ptd, lo=last_played)
    # sub, i = find_subtitle(subs, pt, ptd)

    # sub_list = find_subtitles(subs, pt, ptd, lo=last_played)
    if DEBUG:
        print('Result of find_subtitle: ', i)
        # print('Result of find_subtitles: ', len(sub_list))
    # hours, minutes, seconds, milliseconds = time_convert(sub.start)
    # t = seconds + minutes*60 + hours*60*60 + milliseconds/1000.0
    if sub!="": # and i > last_played:
        print("Light event:", i, sub)
        # print("Trigger light event %s" % i)
        trigger_light(sub)
        # sleep(.1)
        last_played=i
예제 #4
0
    def triggerPreviousEvent(self, pos):
        if LIGHTING_MSGS:
            print("Finding last lighting command from pos: ", pos)

        pp = pos
        pt = SubRipTime(seconds=pp)
        ptd = SubRipTime(seconds=(pp + 1 * TICK_TIME))

        if VERBOSE and DEBUG:
            print("Finding last light event, starting from: ")
            print("pt: ", ptd)
            print("ptd: ", ptd)

        sub, i = self.find_subtitle(self.subs, pt, ptd, backwards=True)

        if LIGHTING_MSGS:
            print("Seeking, found sub:", sub, " at pos: ", i)

        if sub != "":  #and i > self.last_played:
            if LIGHTING_MSGS and DEBUG:
                print(i, "Found last lighting event!:", sub)
            # print("Trigger light event %s" % i)
            self.trigger_light(sub)
            self.last_played = i
            if DEBUG:
                print('last_played: ', i)
예제 #5
0
def tick():
    global subs
    global player
    global last_played
    global TICK_TIME
    # print(subs[0])
    t = perf_counter()
    # ts = str(timedelta(seconds=t)).replace('.',',')
    # tsd = str(timedelta(seconds=t+10*TICK_TIME)).replace('.',',')
    ts = SubRipTime(seconds = t)
    tsd = SubRipTime(seconds = t+1*TICK_TIME)
    # print(dir(player))
    pp = player.get_position()
    ptms = player.get_time()/1000.0
    pt = SubRipTime(seconds=(player.get_time()/1000.0))
    ptd = SubRipTime(seconds=(player.get_time()/1000.0+1*TICK_TIME))
    print('Time: %s | %s | %s - %s | %s - %s | %s | %s' % (datetime.now(),t,ts,tsd,pt,ptd,pp,ptms))
    # sub, i = find_subtitle(subs, ts, tsd)
    sub, i = find_subtitle(subs, pt, ptd)
    # hours, minutes, seconds, milliseconds = time_convert(sub.start)
    # t = seconds + minutes*60 + hours*60*60 + milliseconds/1000.0
    print("Subtitle:", sub, i)
    if sub!="" and i > last_played:
        trigger_light_hue(sub)
        last_played=i
예제 #6
0
def merge_srt(chn_file, eng_file, output_file):
    delta = SubRipTime(milliseconds=500)
    subs_a = SubRipFile.open(chn_file)
    subs_b = SubRipFile.open(eng_file)
    out = merge_subtitle(subs_a, subs_b, delta)
    if os.path.isfile(output_file):
        os.remove(output_file)
    out.save(output_file, encoding='utf8')
 def offset(self):
     d = self.media.offset
     hours, remainder = divmod(d.seconds, 3600)
     minutes, seconds = divmod(remainder, 60)
     return SubRipTime(hours=hours,
                       minutes=minutes,
                       seconds=seconds,
                       milliseconds=d.microseconds / 1000)
예제 #8
0
 def test_from_time(self):
     time_obj = time(1, 2, 3, 4000)
     self.assertEqual(SubRipTime(1, 2, 3, 4), time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 5) >= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 3) <= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 0) != time_obj)
     self.assertEqual(SubRipTime(1, 2, 3, 4).to_time(), time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 5).to_time() >= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 3).to_time() <= time_obj)
     self.assertTrue(SubRipTime(1, 2, 3, 0).to_time() != time_obj)
예제 #9
0
 def test_from_tuple(self):
     self.assertEqual((0, 0, 0, 0), SubRipTime())
     self.assertEqual((0, 0, 0, 1), SubRipTime(milliseconds=1))
     self.assertEqual((0, 0, 2, 0), SubRipTime(seconds=2))
     self.assertEqual((0, 3, 0, 0), SubRipTime(minutes=3))
     self.assertEqual((4, 0, 0, 0), SubRipTime(hours=4))
     self.assertEqual((1, 2, 3, 4), SubRipTime(1, 2, 3, 4))
예제 #10
0
 def test_from_dict(self):
     self.assertEqual(dict(), SubRipTime())
     self.assertEqual(dict(milliseconds=1), SubRipTime(milliseconds=1))
     self.assertEqual(dict(seconds=2), SubRipTime(seconds=2))
     self.assertEqual(dict(minutes=3), SubRipTime(minutes=3))
     self.assertEqual(dict(hours=4), SubRipTime(hours=4))
     self.assertEqual(dict(hours=1, minutes=2, seconds=3, milliseconds=4),
                      SubRipTime(1, 2, 3, 4))
예제 #11
0
    def generate_srt(self, text: str):
        """
        Generates .srt file with the given text and timestamps.
        :param text: String with all retrieved text.
        """
        self.create_subs_path()

        subs = open_srt(self.srt_path)
        texts = self.prepare_text(text.split(" "))
        timestamps = self.prepare_timestamps(texts)

        for i, (sentence, (start_timestamp,
                           end_timestamp)) in enumerate(zip(texts,
                                                            timestamps)):
            start_timestamp_list = [
                int(ts) for ts in start_timestamp.split(':')
            ]
            end_timestamp_list = [int(ts) for ts in end_timestamp.split(':')]

            sub = SubRipItem(index=i)
            sub.text = sentence

            sub.start = SubRipTime(hours=start_timestamp_list[0],
                                   minutes=start_timestamp_list[1],
                                   seconds=start_timestamp_list[2],
                                   milliseconds=start_timestamp_list[3])

            sub.end = SubRipTime(hours=end_timestamp_list[0],
                                 minutes=end_timestamp_list[1],
                                 seconds=end_timestamp_list[2],
                                 milliseconds=end_timestamp_list[3])

            subs.append(sub)

        # Saving result subtitles into file
        subs.save(self.srt_path, encoding='utf-8')

        logging.info(f"Generated subtitles are saved in {self.srt_path}")
예제 #12
0
def get_captions(client_name, clip_id):
    h = httplib2.Http()
    g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
    print "Fetching URL: %s" % g_url
    response, j = h.request(g_url)
    dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
    filename = dirname + "%s.srt" % clip_id
    subs = SubRipFile()

    if response.get('status') == '200':
        captions = []
        try:
            j = json.loads(j, strict=False)[0]
        except ValueError:
            ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
            try:
                j = json.loads(ts, strict=False)[0]
            except UnicodeDecodeError:
                ts = unicode(ts, errors='ignore')
                j = json.loads(ts, strict=False)[0]
        except:
            j = False

        sub_count = 0
        for item in j: 
            if item["type"] == "text":
                cap = item["text"]
                offset = round(float(item["time"]), 3)
                captions.append({'time': offset, 'text': cap})        
                end = get_cap_end(j, sub_count)
                if end:
                    subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
                    subs.append(subtitle)
           
            sub_count = sub_count + 1
        
        try:
            subs.save(path=filename, encoding="utf-8")
        except IOError:
            p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
            t = p.wait()

            subs.save(path=filename, encoding="utf-8")
            
        s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
        return (captions, s3_url)
    else:
        return ([], '')
예제 #13
0
srt = SubRipFile()

# get all DisplaySets that contain an image
print("Loading DisplaySets...")
allsets = [ds for ds in tqdm(pgs.iter_displaysets())]

print(f"Running OCR on {len(allsets)} DisplaySets and building SRT file...")
subText = ""
subStart = 0
subIndex = 0
for ds in tqdm(allsets):
    if ds.has_image:
        # get Palette Display Segment
        pds = ds.pds[0]
        # get Object Display Segment
        ods = ds.ods[0]

        # img = make_image(ods, pds)
        # subText = pytesseract.image_to_string(img)
        subStart = ods.presentation_timestamp
    else:
        startTime = SubRipTime(milliseconds=int(subStart))
        endTime = SubRipTime(
            milliseconds=int(ds.end[0].presentation_timestamp))
        srt.append(SubRipItem(subIndex, startTime, endTime, "subText"))
        subIndex += 1

print(f"Done. SRT file saved as {srtFile}")
srt.save(srtFile, encoding='utf-8')
예제 #14
0
import sys
import os

from pysrt import SubRipFile  # https://github.com/byroot/pysrt
from pysrt import SubRipItem
from pysrt import SubRipTime

from textAnalyse import analyzeSubLevel

from fixEncoding import makeFileUtf8Bom
from syncSrts import syncSrts

delta = SubRipTime(milliseconds=500)
encoding = "utf_8"

this = sys.modules[__name__]

this.L1_sub_template = "{}"
this.L2_sub_template = "{}"

level_criterias = {
    '1': {
        'max_CEFR_level':
        'A1',  # lines with CEFR level > this will not be hidden
        'max_flesh_kincade_grade':
        4,  # lines with fk grade > this will not be hidden
        'max_characters':
        30,  # lines with more characters than this will never be hidden
        'max_words': 8,  # lines with more words than this will never be hidden
    },
    '2': {
예제 #15
0
def merge_video_subtitle(video_id):
    """
    将video_id的中英vtt字幕转换为srt字幕,然后合并为srt格式的字幕
    :param video_id:
    :return:
    """
    video = Video.objects.get(pk=video_id)

    # Settings default values
    delta = SubRipTime(milliseconds=500)
    encoding = "utf_8"

    if (video.subtitle_cn != '') & (video.subtitle_en != ''):
        # convert_file(input_captions = video.subtitle_cn, output_writer)

        # vtt格式的字幕
        # subs_cn_vtt = SubRipFile.open(video.subtitle_cn.path,
        # encoding=encoding)
        # subs_en_vtt = SubRipFile.open(video.subtitle_en.path,
        # encoding=encoding)

        # 将vtt字幕转换为srt
        subs_cn_srt_filename = '%s-%s.cn.srt' % (get_valid_filename(
            video.title), video.video_id)
        subs_cn_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_cn_srt_filename)

        # 此功能失效
        # subs_cn_srt_result = convert_file(
        # input_captions=video.subtitle_cn.path,output_writer=subs_cn_srt)

        subs_cn_srt_result = convert_subtilte_format(
            srt_file=video.subtitle_cn.path, ass_file=subs_cn_srt_path)

        subs_en_srt_filename = '%s-%s.en.srt' % (get_valid_filename(
            video.title), video.video_id)
        subs_en_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_en_srt_filename)
        # subs_en_srt_result = convert_file(
        # input_captions=video.subtitle_en.path,output_writer = subs_en_srt)
        subs_en_srt_path = convert_subtilte_format(
            srt_file=video.subtitle_en.path, ass_file=subs_en_srt_path)

        subs_cn_srt = SubRipFile.open(subs_cn_srt_path, encoding=encoding)
        subs_en_srt = SubRipFile.open(subs_en_srt_path, encoding=encoding)
        merge_subs = merge_subtitle(subs_cn_srt, subs_en_srt, delta)

        # 某些youtube视频的title有非ASCII的字符,或者/等不能出现在文件名中的字符
        # 所以使用django utils自带的get_valid_filename()转化一下
        # 注意:与youtube-dl自带的restrictfilenames获得的文件名不一样,
        # 也就是merge_subs_filename  与 subtitle_cn, subtitle_cn中名称可能会不一样
        # 标题中的 . 依然会保留
        merge_subs_filename = '%s-%s.zh-Hans.en.srt' % (get_valid_filename(
            video.title), video.video_id)

        merge_subs_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                       merge_subs_filename)

        merge_subs.save(merge_subs_path, encoding=encoding)

        video.subtitle_merge = merge_subs_path
        video.save(update_fields=['subtitle_merge'])
        return merge_subs_path
    else:
        return False
예제 #16
0
 def test_negative_serialization(self):
     self.assertEqual('00:00:00,000', str(SubRipTime(-1, 2, 3, 4)))
예제 #17
0
    def tick(self):
        # Leaving the comments below in for Francesco, they could be part of
        # a mysterious but useful debug strategy
        # try:

        if True:
            # print(subs[0])
            t = perf_counter()

            # ts = str(timedelta(seconds=t)).replace('.',',')
            # tsd = str(timedelta(seconds=t+10*TICK_TIME)).replace('.',',')

            ts = SubRipTime(seconds=t)
            tsd = SubRipTime(seconds=t + (1 * TICK_TIME))
            # print(dir(player))

            try:
                pp = self.player.getPosition()
            except Exception as e:
                print(
                    "Could not get the current position of the player, shutting down lighting gracefully..."
                )
                logging.error(e)
                self.__del__()

            #ptms = player.get_time()/1000.0
            #pt = SubRipTime(seconds=(player.get_time()/1000.0))
            #ptd = SubRipTime(seconds=(player.get_time()/1000.0+1*TICK_TIME))

            pt = SubRipTime(seconds=pp)
            ptd = SubRipTime(seconds=(pp + 1 * TICK_TIME))

            if DEBUG:
                #print('Time: %s | %s | %s - %s | %s - %s | %s | %s' % (datetime.now(),t,ts,tsd,pt,ptd,pp,ptms))
                # print('Time: %s | %s | %s | %s | %s | %s | %s ' % (datetime.now(),t,ts,tsd,pp,pt,ptd))
                pass
            ## sub, i = self.find_subtitle(subs, ts, tsd)
            # sub, i = self.find_subtitle(self.subs, pt, ptd)
            sub, i = self.find_subtitle(self.subs,
                                        pt,
                                        ptd,
                                        lo=self.last_played)

            if DEBUG:
                print(i, "Found Subtitle for light event:", sub, i)

            ## hours, minutes, seconds, milliseconds = time_convert(sub.start)
            ## t = seconds + minutes*60 + hours*60*60 + milliseconds/1000.0

            if sub != "":  #and i > self.last_played:
                if LIGHTING_MSGS and DEBUG:
                    print(i, "Light event:", sub)
                # print("Trigger light event %s" % i)
                self.trigger_light(sub)
                self.last_played = i
                if DEBUG:
                    print('last_played: ', i)

            pod_mode = MENU_DMX_VAL != None

            if self.dmx_interpolator.isRunning() and pod_mode is False:
                if self.PLAY_DMX:
                    if self.dmx != None:
                        iFrame = self.dmx_interpolator.getInterpolatedFrame(pt)
                        self.dmx.write_frame(iFrame)
예제 #18
0
 def setUp(self):
     self.time = SubRipTime()
예제 #19
0
 def test_mul(self):
     self.assertEqual(self.time * 2, SubRipTime(2, 4, 6, 8))
     self.assertEqual(self.time * 0.5, (0, 31, 1, 502))
예제 #20
0
 def setUp(self):
     self.time = SubRipTime(1, 2, 3, 4)
예제 #21
0
 def test_from_ordinal(self):
     self.assertEqual(SubRipTime.from_ordinal(3600000), {'hours': 1})
     self.assertEqual(SubRipTime(1), 3600000)
예제 #22
0
 def test_parsing(self):
     for time_string, time_items in self.KNOWN_VALUES:
         self.assertEqual(time_string, SubRipTime(*time_items))
예제 #23
0
 def test_negative_serialization(self):
     self.assertEquals(u'00:00:00,000', unicode(SubRipTime(-1, 2, 3, 4)))
예제 #24
0
 def test_serialization(self):
     for time_string, time_items in self.KNOWN_VALUES:
         self.assertEqual(time_string, str(SubRipTime(*time_items)))
예제 #25
0
    print "  --delta=<milliseconds>    default: 500"
    print "  -e <encoding>             Encoding of input and output files."
    print "  --encoding=<encoding>     default: utf_8"


def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:e:',
                                   ["help", "encoding=", "delta="])
    except getopt.GetoptError, err:
        print str(err)
        usage()
        sys.exit(2)

    #Settings default values
    delta = SubRipTime(milliseconds=500)
    encoding = "utf_8"
    #-

    if len(args) <> 3:
        usage()
        sys.exit(2)

    for o, a in opts:
        if o in ("-d", "--delta"):
            delta = SubRipTime(milliseconds=int(a))
        elif o in ("-e", "--encoding"):
            encoding = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()