Example #1
0
def to_srt(df, filename):
    out = SubRipFile(encoding='utf-8')
    for i, r in df.iterrows():
        begin = convert_time(r['begin'])
        end = convert_time(r['end'])
        out.append(SubRipItem(0, begin, end, r['text']))
    out.save(filename)
Example #2
0
def add_videos_to_index(subtitle_index, output_file, index):
	vindexReader = csv.reader(open(subtitle_index, 'rb'))
	vinfoWriter = csv.writer(open(output_file, 'wt'))
	vinfoWriter.writerow(['title', 'filename', 'id', 'views', 'type', 'url', 'text'])
	for row in vindexReader:
		try:
			filename = row[1] + '.en.srt'
			url = 'http://www.youtube.com/watch?v=' + row[2]
			text = open(filename).read()
			text_ascii = removeNonAscii(text)
			subtitles = SubRipFile.open(filename)
			vinfoWriter.writerow([row[0], row[1], row[2], row[3], row[4], url, text_ascii])
			punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
			stopwords = ['']
			with open('/Users/connormendenhall/Python/DaveDaveFind/DaveDaveFind/data/stopwords.csv', 'rb') as f:
				wordlist = csv.reader(f)
				for stopword in wordlist:
					stopwords.append(stopword[0])
			for sentence in subtitles:
				text = (sentence.text)
				wordlist = text.split()
				for word in wordlist:
					word = word.lstrip(punctuation)
					word = word.rstrip(punctuation)
					word = word.lower()
					if word not in stopwords:
						add_to_index(index, word, url)
				
		except:
			pass
	print "[add_videos_to_index()] Videos added."
	return index
Example #3
0
 def test_windows1252(self):
     srt_string = codecs.open(self.windows_path, encoding='windows-1252').read()
     srt_file = SubRipFile.from_string(srt_string, encoding='windows-1252', eol='\r\n')
     self.assertEquals(len(srt_file), 1332)
     self.assertEquals(srt_file.eol, '\r\n')
     self.assertRaises(UnicodeDecodeError, SubRipFile.open,
         self.utf8_path, encoding='ascii')
Example #4
0
    def mostrarSubtitulos(self, escena, ruta):
            if (self.ok==1):
                
                self.escena= escena
                
                #subs = SubRipFile.open(ruta, encoding='iso-8859-1')
                subs = SubRipFile.open(ruta, encoding='UTF-8') # Con esta codificacion logramos ver los tildes
                
                #print("Hay" ,subs.__len__()," subtitulos")
                
                #print "SEGUNDOS=", cant_segs
                if (self.tmp== subs.__len__()): # cuando llega al final de los subtitulos
                    #self.tmp= subs.__len__()-1                
                    self.tmp= 0
                    self.ok= 0
                    #print("entro en tiempo " ,self.tiempoActual)
                    self.tiempoActual= 0

                linea= subs[self.tmp]
                tics_ini = (linea.start.minutes*60*1000)+(linea.start.seconds*1000)+linea.start.milliseconds
                tics_fin = (linea.end.minutes*60*1000)+(linea.end.seconds*1000)+linea.end.milliseconds
                
                if ((tics_ini<=(pygame.time.get_ticks()-self.offset)) and ((pygame.time.get_ticks()-self.offset)<=tics_fin)): 
                    if (self.imprimir==1):
                        self.escena.draw()          # reimprime la escena
                        self.printTexto(linea.text) # imprime mensaje
                        self.imprimir= 0
                        self.tmp= self.tmp+1
                        self.entrar= 1
                        
                else:
                    if (self.entrar==1):   
                        self.printTexto("")                                   
                        self.imprimir= 1                
                        self.entrar=0
    def convert(content, input_format, output_format):
        """
        Convert transcript `content` from `input_format` to `output_format`.

        Accepted input formats: sjson, srt.
        Accepted output format: srt, txt.
        """
        assert input_format in ('srt', 'sjson')
        assert output_format in ('txt', 'srt', 'sjson')

        if input_format == output_format:
            return content

        if input_format == 'srt':

            if output_format == 'txt':
                text = SubRipFile.from_string(content.decode('utf8')).text
                return HTMLParser().unescape(text)

            elif output_format == 'sjson':
                raise NotImplementedError

        if input_format == 'sjson':

            if output_format == 'txt':
                text = json.loads(content)['text']
                return HTMLParser().unescape("\n".join(text))

            elif output_format == 'srt':
                return generate_srt_from_sjson(json.loads(content), speed=1.0)
    def convert(content, input_format, output_format):
        """
        Convert transcript `content` from `input_format` to `output_format`.

        Accepted input formats: sjson, srt.
        Accepted output format: srt, txt, sjson.

        Raises:
            TranscriptsGenerationException: On parsing the invalid srt content during conversion from srt to sjson.
        """
        assert input_format in ('srt', 'sjson')
        assert output_format in ('txt', 'srt', 'sjson')

        if input_format == output_format:
            return content

        if input_format == 'srt':

            if output_format == 'txt':
                text = SubRipFile.from_string(content.decode('utf8')).text
                return HTMLParser().unescape(text)

            elif output_format == 'sjson':
                try:
                    # With error handling (set to 'ERROR_RAISE'), we will be getting
                    # the exception if something went wrong in parsing the transcript.
                    srt_subs = SubRipFile.from_string(
                        # Skip byte order mark(BOM) character
                        content.decode('utf-8-sig'),
                        error_handling=SubRipFile.ERROR_RAISE
                    )
                except Error as ex:   # Base exception from pysrt
                    raise TranscriptsGenerationException(text_type(ex))

                return json.dumps(generate_sjson_from_srt(srt_subs))

        if input_format == 'sjson':

            if output_format == 'txt':
                text = json.loads(content)['text']
                text_without_none = [line if line else '' for line in text]
                return HTMLParser().unescape("\n".join(text_without_none))

            elif output_format == 'srt':
                return generate_srt_from_sjson(json.loads(content), speed=1.0)
Example #7
0
    def input_file(self):
        if not hasattr(self, '_source_file'):
            with open(self.arguments.file, 'rb') as f:
                content = f.read()
                encoding = detect(content).get('encoding')
                encoding = self.normalize_encoding(encoding)

            self._source_file = SubRipFile.open(self.arguments.file,
                encoding=encoding, error_handling=SubRipFile.ERROR_LOG)
        return self._source_file
Example #8
0
    def save(self, path):
        if path.endswith('srt'):
            verify_dependencies(['pysrt'])
            from pysrt import SubRipFile, SubRipItem
            from datetime import time

            out = SubRipFile()
            for elem in self._elements:
                start = time(*self._to_tup(elem.onset))
                end = time(*self._to_tup(elem.onset + elem.duration))
                out.append(SubRipItem(0, start, end, elem.text))
            out.save(path)
        else:
            with open(path, 'w') as f:
                f.write('onset\ttext\tduration\n')
                for elem in self._elements:
                    f.write('{}\t{}\t{}\n'.format(elem.onset,
                                                  elem.text,
                                                  elem.duration))
Example #9
0
    def test_eol_conversion(self):
        input_file = open(self.windows_path, 'rU')
        input_file.read()
        self.assertEquals(input_file.newlines, '\r\n')

        srt_file = SubRipFile.open(self.windows_path, encoding='windows-1252')
        srt_file.save(self.temp_path, eol='\n')

        output_file = open(self.temp_path, 'rU')
        output_file.read()
        self.assertEquals(output_file.newlines, '\n')
Example #10
0
    def test_eol_conversion(self):
        input_file = open(self.windows_path, "rU")
        input_file.read()
        self.assertEquals(input_file.newlines, "\r\n")

        srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252")
        srt_file.save(self.temp_path, eol="\n")

        output_file = open(self.temp_path, "rU")
        output_file.read()
        self.assertEquals(output_file.newlines, "\n")
Example #11
0
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
    """
    合并两种不同言语的srt字幕

    因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
    导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免

    参考https://github.com/byroot/pysrt/issues/17

    https://github.com/byroot/pysrt/issues/15

    :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
    :param sub_b:
    :param delta:
    :return:
    """
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i - 1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
Example #12
0
def get_captions(client_name, clip_id):
    h = httplib2.Http()
    g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
    print "Fetching URL: %s" % g_url

    try:
        response, j = h.request(g_url)
    except httplib.BadStatusLine as exception:
        return None

    dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
    filename = dirname + "%s.srt" % clip_id
    subs = SubRipFile()

    if response.get('status') == '200':
        captions = []
        try:
            j = json.loads(j, strict=False)[0]
        except ValueError:
            ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
            try:
                j = json.loads(ts, strict=False)[0]
            except UnicodeDecodeError:
                ts = unicode(ts, errors='ignore')
                j = json.loads(ts, strict=False)[0]
        except:
            j = False

        sub_count = 0
        for item in j:
            if item["type"] == "text":
                cap = item["text"]
                offset = round(float(item["time"]), 3)
                captions.append({'time': offset, 'text': cap})
                end = get_cap_end(j, sub_count)
                if end:
                    subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
                    subs.append(subtitle)

            sub_count = sub_count + 1

        try:
            subs.save(path=filename, encoding="utf-8")
        except IOError:
            p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
            t = p.wait()

            subs.save(path=filename, encoding="utf-8")

        s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
        return (captions, s3_url)
    else:
        return ([], '')
Example #13
0
    def save(self, *args, **kwargs):
        episode = super(Episode, self).save(*args, **kwargs)

        # Delete existing subtitles
        self.subtitle_set.all().delete()

        # Import subtitles from file
        subs = SubRipFile.open(self.subtitles.path)

        with transaction.commit_on_success():
            for sub in subs:
                self.subtitle_set.create(
                    start=sub.start.ordinal, end=sub.end.ordinal,
                    text=sub.text)
Example #14
0
def merge_subtitle(sub_a, sub_b, delta):
    out = SubRipFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = SubRipTime.from_ordinal(intervals[i-1])
        end = SubRipTime.from_ordinal(intervals[i])

        if (end-start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = SubRipItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
  def GetSrtCaptions(self):
    """Retrieves and parses the actual ASR captions track's data.

    Given the URL of an ASR captions track, this retrieves it in the SRT format
    and uses the pysrt library to parse it into a format we can manipulate.

    Raises:
      Error: The ASR caption track could not be retrieved.
    """
    response_headers, body = self.http.request("%s?fmt=srt" % self.track_url, "GET", headers=self.headers)

    if response_headers["status"] == "200":
      self.srt_captions = SubRipFile.from_string(body)
    else:
      raise Error("Received HTTP response %s when requesting %s?fmt=srt." % (response_headers["status"], self.track_url))
def generate_subs_from_source(speed_subs, subs_type, subs_filedata, item, language='en'):
    """Generate transcripts from source files (like SubRip format, etc.)
    and save them to assets for `item` module.
    We expect, that speed of source subs equal to 1

    :param speed_subs: dictionary {speed: sub_id, ...}
    :param subs_type: type of source subs: "srt", ...
    :param subs_filedata:unicode, content of source subs.
    :param item: module object.
    :param language: str, language of translation of transcripts
    :returns: True, if all subs are generated and saved successfully.
    """
    _ = item.runtime.service(item, "i18n").ugettext
    if subs_type.lower() != 'srt':
        raise TranscriptsGenerationException(_("We support only SubRip (*.srt) transcripts format."))
    try:
        srt_subs_obj = SubRipFile.from_string(subs_filedata)
    except Exception as ex:
        msg = _("Something wrong with SubRip transcripts file during parsing. Inner message is {error_message}").format(
            error_message=ex.message
        )
        raise TranscriptsGenerationException(msg)
    if not srt_subs_obj:
        raise TranscriptsGenerationException(_("Something wrong with SubRip transcripts file during parsing."))

    sub_starts = []
    sub_ends = []
    sub_texts = []

    for sub in srt_subs_obj:
        sub_starts.append(sub.start.ordinal)
        sub_ends.append(sub.end.ordinal)
        sub_texts.append(sub.text.replace('\n', ' '))

    subs = {
        'start': sub_starts,
        'end': sub_ends,
        'text': sub_texts}

    for speed, subs_id in speed_subs.iteritems():
        save_subs_to_store(
            generate_subs(speed, 1, subs),
            subs_id,
            item,
            language
        )

    return subs
Example #17
0
File: utils.py Project: edx/edx-val
def get_transcript_format(transcript_content):
    """
    Returns transcript format.

    Arguments:
        transcript_content (str): Transcript file content.
    """
    try:
        sjson_obj = json.loads(transcript_content)
    except ValueError:
        # With error handling (set to 'ERROR_RAISE'), we will be getting
        # the exception if something went wrong in parsing the transcript.
        srt_subs = SubRipFile.from_string(transcript_content, error_handling=SubRipFile.ERROR_RAISE)
        if len(srt_subs) > 0:
            return TranscriptFormat.SRT
    return TranscriptFormat.SJSON
Example #18
0
def generate_subs_from_source(speed_subs, subs_type, subs_filedata, item):
    """Generate transcripts from source files (like SubRip format, etc.)
    and save them to assets for `item` module.
    We expect, that speed of source subs equal to 1

    :param speed_subs: dictionary {speed: sub_id, ...}
    :param subs_type: type of source subs: "srt", ...
    :param subs_filedata:unicode, content of source subs.
    :param item: module object.
    :returns: True, if all subs are generated and saved successfully.
    """
    if subs_type != 'srt':
        raise TranscriptsGenerationException("We support only SubRip (*.srt) transcripts format.")
    try:
        srt_subs_obj = SubRipFile.from_string(subs_filedata)
    except Exception as e:
        raise TranscriptsGenerationException(
            "Something wrong with SubRip transcripts file during parsing. "
            "Inner message is {}".format(e.message)
        )
    if not srt_subs_obj:
        raise TranscriptsGenerationException("Something wrong with SubRip transcripts file during parsing.")

    sub_starts = []
    sub_ends = []
    sub_texts = []

    for sub in srt_subs_obj:
        sub_starts.append(sub.start.ordinal)
        sub_ends.append(sub.end.ordinal)
        sub_texts.append(sub.text.replace('\n', ' '))

    subs = {
        'start': sub_starts,
        'end': sub_ends,
        'text': sub_texts}

    for speed, subs_id in speed_subs.iteritems():
        save_subs_to_store(
            generate_subs(speed, 1, subs),
            subs_id,
            item
        )

    return subs
Example #19
0
    def generate_vocap_file(self):

        ######### Generate subs in vocap format
        subs = SubRipFile.open(self.path+"/"+self.srt_file, encoding="utf-8")
        fileobj=codecs.open(self.path+"/"+self.vocap_file, "w", "utf-8")
        for i in range(len(subs)):
            text = subs[i].text
            text = text.replace(u"###", u"#.#.#")
            text = text.replace(u"\n", u" ")
            #text = cgi.escape(text)

            start = subs[i].start.seconds
            start += 60*subs[i].start.minutes
            start += 3600*subs[i].start.hours
            time = unicode(str(start),"utf-8")

            line = u"###"+time+u" "+text+u"\n"

            fileobj.write(line)
        fileobj.close()
Example #20
0
	def __init__(self, filename):
		self.filename = filename
			
		self.model = Gtk.ListStore(object, str)
		self.srt_model = []
		if not os.path.exists(filename) :
			raise(FileNameError(filename))

		try:
			self.srt_model = SubRipFile.open(path=filename)
		except UnicodeDecodeError as unic:
			debug(unic)
			try:
				info("trying ...", "ISO-8859-1")
				self.srt_model = SubRipFile(path = filename, encoding = "iso-8859-1")
			except Exception as excep :
				debug(excep)
				self.model = None
		except IOError as error:
			info("Impossible de lire le fichier de sous titre: error {}".format(error))

		for line in self.srt_model:
			# print("appending",line)
			self.model.append([line, line.text])
def get_srt_data(source):
    captions = SubRipFile.from_string(source.srt_data)
    for c in captions:
        start = c.start.to_time()
        end = c.end.to_time()
        offset = start.second + (start.minute * 60) + (start.hour * 60 * 60) + (start.microsecond / 1000000) #it can't possibly be more than hours.
        end_offset = end.second + (end.minute * 60) + (end.hour * 60 * 60) + (end.microsecond / 1000000)
        
        
        note, created = Note.objects.get_or_create(
            text = c.text,
            offset = end_offset,
            #end_offset = end_offset,
            user = source.user,
            user_name = source.user.username,
            video = source.video,
            private = False,
            import_source = source,
            import_source_name = source.name,
            source = 'SRT File',
            original_source = 'SRT File',
            source_link = source.url, #they're probably not going to have one of these...
            type = "caption"
        )
Example #22
0
 def test_empty_file(self):
     file = SubRipFile.open('/dev/null',
                            error_handling=SubRipFile.ERROR_RAISE)
     self.assertEquals(len(file), 0)
Example #23
0
 def __test_encoding(self, encoding):
     srt_file = SubRipFile.open(os.path.join(self.base_path, encoding))
     self.assertEquals(len(srt_file), 7)
     self.assertEquals(srt_file[0].index, 1)
Example #24
0
 def test_length(self):
     path = os.path.join(self.base_path, 'capability_tester.srt')
     file = SubRipFile.open(path)
     self.assertEquals(len(file), 37)
import sys, pytesseract
from pgsreader import PGSReader
from imagemaker import make_image

from pysrt import SubRipFile, SubRipItem, SubRipTime

from tqdm import tqdm

supFile = sys.argv[1]
pgs = PGSReader(supFile)

srtFile = ".".join(supFile.split('.')[:-1])+".srt"
with open('myfile.txt', 'w') as fp: 
    pass

srt = SubRipFile()

# get all DisplaySets that contain an image
print("Loading DisplaySets...")
allsets = [ds for ds in tqdm(pgs.iter_displaysets())]

print(f"Running OCR on {len(allsets)} DisplaySets and building SRT file...")
subText = ""
subStart = 0
subIndex = 0
for ds in tqdm(allsets):
    try:
        if ds.has_image:
            # get Palette Display Segment
            pds = ds.pds[0]
            # get Object Display Segment
Example #26
0
 def setUp(self):
     self.file = SubRipFile.open(
         os.path.join(file_path, 'tests', 'static', 'utf-8.srt'))
Example #27
0
 def test_utf8(self):
     self.assertEquals(len(SubRipFile.open(self.utf8_path)), 1332)
     self.assertRaises(UnicodeDecodeError, SubRipFile.open, self.windows_path, encoding="utf_8")
Example #28
0
 def test_utf8(self):
     unicode_content = codecs.open(self.utf8_path, encoding="utf_8").read()
     self.assertEquals(len(SubRipFile.from_string(unicode_content)), 1332)
     self.assertRaises(UnicodeDecodeError, SubRipFile.from_string, open(self.windows_path).read())
Example #29
0
 def test_utf8(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     self.assertEquals(len(SubRipFile.from_string(unicode_content)), 1332)
     self.assertRaises(UnicodeDecodeError, SubRipFile.from_string,
                       open(self.windows_path).read())
Example #30
0
    except getopt.GetoptError, err:
        print str(err)
        usage()
        sys.exit(2)

    #Settings default values
    delta = SubRipTime(milliseconds=500)
    encoding = "utf_8"
    #-

    if len(args) <> 3:
        usage()
        sys.exit(2)

    for o, a in opts:
        if o in ("-d", "--delta"):
            delta = SubRipTime(milliseconds=int(a))
        elif o in ("-e", "--encoding"):
            encoding = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()

    subs_a = SubRipFile.open(args[0], encoding=encoding)
    subs_b = SubRipFile.open(args[1], encoding=encoding)
    out = merge_subtitle(subs_a, subs_b, delta)
    out.save(args[2], encoding=encoding)


if __name__ == "__main__":
    main()
Example #31
0
from pysrt import SubRipFile, SubRipTime

# hay que ver todavia como usar SubRipTime que nos va a solucionar la lectura

subs = SubRipFile.open("14Blades.srt", encoding="iso-8859-1")

print("Hay", subs.__len__(), " subtitulos")

linea = subs[0]
print(linea.text)
print("inicio", linea.start.seconds, " segundos.")
print("fin", linea.end.seconds, " segundos.")


linea = subs[1]
print(linea.text)
print("inicio", linea.start.seconds, " segundos.")
print("fin", linea.end.seconds, " segundos.")

linea = subs[14]
print(linea.text)
print("inicio", linea.start.minutes, "minutos con", linea.start.seconds, "segundos.")
print("fin", linea.start.minutes, "minutos con", linea.end.seconds, "segundos.")

# equivalent
# part = subs.slice(ends_after=SubRipTime(0, 0, 40))
# part = subs.slice(ends_after=(0, 0, 40))
# part = subs.slice(ends_after={'seconds': 40})

# part.shift(seconds=-2)
# subs.save('other/path.srt', 'utf-8');
Example #32
0
 def test_blank_lines(self):
     items = list(
         SubRipFile.stream([u'\n'] * 20,
                           error_handling=SubRipFile.ERROR_RAISE))
     self.assertEquals(len(items), 0)
Example #33
0
 def test_single_item(self):
     srt_file = SubRipFile(
         [SubRipItem(1, {'seconds': 1}, {'seconds': 2}, 'Hello')])
     self.assertEquals(srt_file.text, 'Hello')
Example #34
0
def syncSrts(subs_L1, subs_L2):
    """Sync subs_L1 by subs_L2 timings and return a SubRipFile.
    """

    out = SubRipFile()
    subs_L2_out = SubRipFile()

    j = 0
    last_j = -1
    dupes = 0
    L2_ind = -1

    for L2_sub in subs_L2:
        L2_ind = L2_ind + 1
        start = L2_sub.start
        end = L2_sub.end
        j = matchSubtitle(subs_L1, start, end, max(last_j, 0))
        L1_sub = subs_L1[j] if (j > -1) else None

        if L1_sub is None:
            text = L2_sub.text
            print("---- Missing: {}: {}".format(
                L2_sub.index, L2_sub.text.replace("\n", "[[NL]]")))
        else:
            text = L1_sub.text

            if j - 1 > last_j and last_j > -1:
                # we skipped a sub in L1_subs
                if isSubMatch(subs_L1[j - 1], subs_L2[L2_ind - 1].start,
                              subs_L2[L2_ind - 1].end):
                    out[len(out) -
                        1].text = out[len(out) -
                                      1].text + "\n" + subs_L1[j - 1].text
                elif isSubMatch(subs_L1[j - 1], start, end):
                    text = subs_L1[j - 1].text + "\n" + text
                else:
                    # A sub line in L1 does not match any in L2
                    # We add it to synced L1, and add an empty one to subs L2
                    item = SubRipItem(0, subs_L1[j - 1].start,
                                      subs_L1[j - 1].end, subs_L1[j - 1].text)
                    out.append(item)
                    item2 = SubRipItem(0, subs_L1[j - 1].start,
                                       subs_L1[j - 1].end, " ")
                    subs_L2_out.append(item2)

            if j == last_j:
                dupes = dupes + 1
                #print("---- OOPS. {}: {} - {}".format(L2_sub.index, L2_sub.text.replace("\n",""), L1_sub.text.replace("\n","")))
            last_j = j

        item = SubRipItem(0, start, end, text)
        out.append(item)

        item2 = SubRipItem(0, start, end, L2_sub.text)
        subs_L2_out.append(item2)

    out.clean_indexes()
    subs_L2_out.clean_indexes()

    fixed = 0
    for i in range(1, len(out)):
        sub1 = out[i - 1].text
        sub2 = out[i].text
        if ((sub1 == sub2)
                and (subs_L2_out[i - 1].text != subs_L2_out[i].text)):
            if (trySplitLine(out, i, sub1)):
                fixed = fixed + 1
                i = i + 1
            else:
                print("---- Oy. {}: {} not fixed".format(
                    i, sub1.replace("\n", "[[NL]]")))

    return out, dupes, fixed, subs_L2_out
Example #35
0
    def export_subtitle(source_file_path: str, subs: List[SubRipItem], target_file_path: str, frame_rate: float = 25.0) -> None:
        """Export subtitle in the format determined by the file extension.

        Arguments:
            source_file_path {string} -- The path to the original subtitle file.
            subs {list} -- A list of SubRipItems.
            target_file_path {string} -- The path to the exported subtitle file.
            frame_rate {float} -- The frame rate for frame-based subtitle formats {default: 25.0}.
        """

        encoding = Utils.detect_encoding(source_file_path)
        _, file_extension = os.path.splitext(source_file_path.lower())
        if file_extension in Subtitle.SUBRIP_EXTENTIONS:
            SubRipFile(subs).save(target_file_path, encoding=encoding)
            Utils.remove_trailing_newlines(target_file_path, encoding)
        elif file_extension in Subtitle.TTML_EXTENSIONS:
            tree = ElementTree.parse(source_file_path)
            tt = tree.getroot()
            cues = (tt.find("tt:body", Subtitle.TT_NS).find("tt:div", Subtitle.TT_NS).findall("tt:p", Subtitle.TT_NS))  # type: ignore
            for index, cue in enumerate(cues):
                cue.attrib["begin"] = str(subs[index].start).replace(",", ".")
                cue.attrib["end"] = str(subs[index].end).replace(",", ".")

            # Change single quotes in the XML header to double quotes
            with open(target_file_path, "w", encoding=encoding) as target:
                if "xml_declaration" in inspect.getfullargspec(ElementTree.tostring).kwonlyargs:  # for >= python 3.8
                    encoded = ElementTree.tostring(tt, encoding=encoding, method="xml", xml_declaration=True)
                else:
                    encoded = ElementTree.tostring(tt, encoding=encoding, method="xml")
                normalised = encoded.decode(encoding) \
                    .replace("<?xml version='1.0' encoding='", '<?xml version="1.0" encoding="',) \
                    .replace("'?>", '"?>')
                target.write(normalised)
        elif file_extension in Subtitle.WEBVTT_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2vtt(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ssa(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ass(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MICRODVD_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2microdvd(path, target_file_path, frame_rate=frame_rate)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MPL2_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2mpl2(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.TMP_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2tmp(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SAMI_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2sami(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.STL_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(target_file_path, encoding=encoding)
            finally:
                os.remove(path)
        else:
            raise UnsupportedFormatException(
                "Unknown subtitle format for file: {}".format(source_file_path)
            )
Example #36
0
    def save_subs_as_target_format(subs: List[SubRipItem], source_file_path: str, target_file_path: str) -> None:
        """Save SubRipItems with the format determined by the target file extension.

        Arguments:
            subs {list} -- A list of SubRipItems.
            source_file_path {string} -- The path to the original subtitle file.
            target_file_path {string} -- The path to the output subtitle file.
        """

        encoding = Utils.detect_encoding(source_file_path)
        _, file_extension = os.path.splitext(target_file_path.lower())
        if file_extension in Subtitle.SUBRIP_EXTENTIONS:
            SubRipFile(subs).save(target_file_path, encoding=encoding)
            Utils.remove_trailing_newlines(target_file_path, encoding)
        elif file_extension in Subtitle.TTML_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ttml(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.WEBVTT_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2vtt(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ssa(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ass(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MICRODVD_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2microdvd(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MPL2_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2mpl2(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.TMP_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2tmp(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SAMI_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2sami(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.STL_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(target_file_path, encoding=encoding)
            finally:
                os.remove(path)
        else:
            raise UnsupportedFormatException(
                "Unknown subtitle format for file: {}".format(source_file_path)
            )
Example #37
0
def main():
    global hue_list, bridge, SRT_FILENAME, HUE_IP_ADDRESS, MAX_BRIGHTNESS
    global DMX_INTERVAL, INTERVAL, TRANSITION_TIME, HUE_IP_ADDRESS, DEBUG, VERBOSE
    global subs, srtFile
    global ipcon, tfIDs, dmx

    f1 = Figlet(font='standard')
    print(f1.renderText('LushRoom'))
    f2 = Figlet(font='standard')
    print(f2.renderText('OSC live record'))

    parser = argparse.ArgumentParser()
    parser.add_argument("--ip",
                        default="127.0.0.1",
                        help="OSC ip address to listen to")
    parser.add_argument("--port",
                        type=int,
                        default=8000,
                        help="OSC port to listen to")
    parser.add_argument("-s",
                        "--srt",
                        default=SRT_FILENAME,
                        help=".srt file name for lighting events")
    parser.add_argument("-b",
                        "--brightness",
                        default=MAX_BRIGHTNESS,
                        help="maximum brightness")
    parser.add_argument("-i",
                        "--interval",
                        default=INTERVAL,
                        help="sampling interval for Philips Hue events")
    parser.add_argument("-d",
                        "--dmx_interval",
                        default=DMX_INTERVAL,
                        help="sampling interval for DMX events")
    parser.add_argument("-t",
                        "--transition_time",
                        default=TRANSITION_TIME,
                        help="transition time between Philips Hue events")
    parser.add_argument("--hue",
                        default=HUE_IP_ADDRESS,
                        help="Philips Hue bridge IP address")

    args = parser.parse_args()

    print(args)

    MAX_BRIGHTNESS = int(args.brightness)
    SRT_FILENAME = args.srt
    INTERVAL = float(args.interval)
    DMX_INTERVAL = float(args.dmx_interval)
    TRANSITION_TIME = float(args.transition_time)
    HUE_IP_ADDRESS = args.hue
    # VERBOSE = args.verbose
    # DEBUG = args.debug

    if SRT_FILENAME != "":
        print("Start recording the %s subtitles track for light events." %
              SRT_FILENAME)
        srtFile = SubRipFile(path=SRT_FILENAME)

    if PLAY_HUE:
        bridge = Bridge(HUE_IP_ADDRESS)
        bridge.connect()
        bridge.get_api()
        lights = bridge.lights
        for l in lights:
            print(l.name)
        for l in lights:
            l.on = True
            l.brightness = MAX_BRIGHTNESS

        light_names = bridge.get_light_objects('name')
        print("Light names:", light_names)

        if PLAY_HUE:
            hue_list = hue_build_lookup_table(lights)
        # else:
        #     hue_list = [[0],['1'],[2],[3],[4],[5],[6],[7],[8],[9]]
        print(hue_list)

    if PLAY_DMX:

        ipcon.connect(HOST, PORT)

        # Register Enumerate Callback
        ipcon.register_callback(IPConnection.CALLBACK_ENUMERATE, cb_enumerate)

        # Trigger Enumerate
        ipcon.enumerate()

        sleep(2)

        if DEBUG:
            print(tfIDs)

        dmxcount = 0
        for tf in tfIDs:
            # try:
            if True:
                # print(len(tf[0]))

                if len(
                        tf[0]
                ) <= 3:  # if the device UID is 3 characters it is a bricklet
                    if tf[1] in deviceIDs:
                        if VERBOSE:
                            print(tf[0], tf[1], getIdentifier(tf))
                    if tf[1] == 285:  # DMX Bricklet
                        if dmxcount == 0:
                            print(
                                "Registering %s as slave DMX device for capturing DMX frames"
                                % tf[0])
                            dmx = BrickletDMX(tf[0], ipcon)
                            dmx.set_dmx_mode(dmx.DMX_MODE_MASTER)
                            # channels = int((int(MAX_BRIGHTNESS)/255.0)*ones(512,)*255)
                            # dmx.write_frame([255,255])
                            sleep(1)
                            # channels = int((int(MAX_BRIGHTNESS)/255.0)*zeros(512,)*255)
                            # dmx.write_frame(channels)
                        dmxcount += 1

    disp = dispatcher.Dispatcher()
    # print(dir(dispatcher))

    for h in range(512):
        disp.map("/hue%s" % h, play_record_hue, "%s" % h)

    for h in range(512):
        disp.map("/dmx%s" % h, play_record_dmx, "%s" % h)

    server = osc_server.ThreadingOSCUDPServer((args.ip, args.port), disp)
    print("Serving OSC on {}".format(server.server_address))
    signal.signal(signal.SIGINT, signal_handler)
    server.serve_forever()
Example #38
0
 def test_utf8(self):
     self.assertEquals(len(SubRipFile.open(self.utf8_path)), 1332)
     self.assertRaises(UnicodeDecodeError, SubRipFile.open,
                       self.windows_path)
Example #39
0
#!/usr/bin/python

from pysrt import SubRipFile
import sys

subs = SubRipFile.open(sys.argv[1])
for s in subs:
    print s.text
    print s.start.milliseconds
Example #40
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = izip(SubRipFile.open(self.utf8_path),
                     SubRipFile.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEquals(unicode(file_item), unicode(string_item))
Example #41
0
 def test_save(self):
     srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252")
     srt_file.save(self.temp_path, eol="\n", encoding="utf-8")
     self.assertEquals(open(self.temp_path, "rb").read(), open(self.utf8_path, "rb").read())
     os.remove(self.temp_path)
Example #42
0
from numpy import array, zeros, array_equal
# import pysrt
import signal
import sys
from pysrt import SubRipFile, SubRipItem, SubRipTime

from tf_device_ids import deviceIdentifiersList

import argparse

SRT_FILENAME = "output_dmx.srt"
AUDIO_FILENAME = "input.mp4"
MAX_BRIGHTNESS = 254
TICK_TIME = 0.05 # seconds

srtFile = SubRipFile()

tfIDs = []

tfConnect = True

prevFrame = zeros(512)
prevTime = 0
subs = []
sub_incr = 1

ipcon = IPConnection()

# if tfConnect:
#     tfIDs = []
Example #43
0
 def setUp(self):
     self.file = SubRipFile.open(os.path.join(file_path, "tests", "static", "utf-8.srt"))
Example #44
0
 def test_windows1252(self):
     srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252")
     self.assertEquals(len(srt_file), 1332)
     self.assertEquals(srt_file.eol, "\r\n")
     self.assertRaises(UnicodeDecodeError, SubRipFile.open, self.utf8_path, encoding="ascii")
Example #45
0
def merge_video_subtitle(video_id):
    """
    将video_id的中英vtt字幕转换为srt字幕,然后合并为srt格式的字幕
    :param video_id:
    :return:
    """
    video = Video.objects.get(pk=video_id)

    # Settings default values
    delta = SubRipTime(milliseconds=500)
    encoding = "utf_8"

    if (video.subtitle_cn != '') & (video.subtitle_en != ''):
        # convert_file(input_captions = video.subtitle_cn, output_writer)

        # vtt格式的字幕
        # subs_cn_vtt = SubRipFile.open(video.subtitle_cn.path,
        # encoding=encoding)
        # subs_en_vtt = SubRipFile.open(video.subtitle_en.path,
        # encoding=encoding)

        # 将vtt字幕转换为srt
        subs_cn_srt_filename = '%s-%s.cn.srt' % (get_valid_filename(
            video.title), video.video_id)
        subs_cn_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_cn_srt_filename)

        # 此功能失效
        # subs_cn_srt_result = convert_file(
        # input_captions=video.subtitle_cn.path,output_writer=subs_cn_srt)

        subs_cn_srt_result = convert_subtilte_format(
            srt_file=video.subtitle_cn.path, ass_file=subs_cn_srt_path)

        subs_en_srt_filename = '%s-%s.en.srt' % (get_valid_filename(
            video.title), video.video_id)
        subs_en_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_en_srt_filename)
        # subs_en_srt_result = convert_file(
        # input_captions=video.subtitle_en.path,output_writer = subs_en_srt)
        subs_en_srt_path = convert_subtilte_format(
            srt_file=video.subtitle_en.path, ass_file=subs_en_srt_path)

        subs_cn_srt = SubRipFile.open(subs_cn_srt_path, encoding=encoding)
        subs_en_srt = SubRipFile.open(subs_en_srt_path, encoding=encoding)
        merge_subs = merge_subtitle(subs_cn_srt, subs_en_srt, delta)

        # 某些youtube视频的title有非ASCII的字符,或者/等不能出现在文件名中的字符
        # 所以使用django utils自带的get_valid_filename()转化一下
        # 注意:与youtube-dl自带的restrictfilenames获得的文件名不一样,
        # 也就是merge_subs_filename  与 subtitle_cn, subtitle_cn中名称可能会不一样
        # 标题中的 . 依然会保留
        merge_subs_filename = '%s-%s.zh-Hans.en.srt' % (get_valid_filename(
            video.title), video.video_id)

        merge_subs_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                       merge_subs_filename)

        merge_subs.save(merge_subs_path, encoding=encoding)

        video.subtitle_merge = merge_subs_path
        video.save(update_fields=['subtitle_merge'])
        return merge_subs_path
    else:
        return False
Example #46
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding="utf_8").read()
     iterator = izip(SubRipFile.open(self.utf8_path), SubRipFile.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEquals(unicode(file_item), unicode(string_item))
Example #47
0
def main():
    os.chdir(sys.argv[1])

    raw_input("are the subtitle timings correct?".upper())

    # ##### extract quotes from IMDB html-file ###################################################
    f = open(r"quotes.htm", "r")
    parser = etree.HTMLParser()
    tree = etree.parse(f, parser)
    f.close()
    root = tree.getroot()

    quotes = []
    for div in root.xpath("//div"):
        try:
            c = div.attrib["class"]
            if c == "sodatext":
                s = etree.tostring(div)
                s = re.sub("\<div.*\>\n", "", s)
                s = re.sub("\</div.*\>", "", s)

                #s = re.sub("\<b\>.*\</b\>:\n", "- ", s) # names
                s = re.sub("\<b\>\<a.*\"\>", "", s)
                s = re.sub("\</a\>\</b\>:\n", ": ", s)

                # share this quote
                s = re.sub("\<p.*\>.*\</p\>", "", s)
                s = re.sub("\<span.*\>.*\</span\>", "", s)

                s = re.sub("\[.*\]", "", s)  # stage directions
                s = re.sub("\<br/\>", "", s)
                s = re.sub("  ", " ", s)
                lines = [line.strip() for line in s.split("\n")]
                lines = [line for line in lines if len(line) > 0]
                if len(lines) == 1:
                    lines[0] = lines[0][1:].strip()
                quote = "\n".join(lines)
                # #######
                '''if len(quote) >= QUOTE_MIN_LEN and len(quote) <= QUOTE_MAX_LEN:
					quotes.append(quote)'''
                quotes.append(quote)
                # #######
        except:
            continue

    quotes = list(set(quotes))
    quotes_clean = [
        re.sub("[%s]+" % re.escape(string.punctuation), "", x) for x in quotes
    ]
    quotes_clean = [x.lower().strip() for x in quotes_clean]
    """for quote in quotes_clean:
		print quote, "\n" """

    # ##### read subtitles from srt-file ###################################################
    subs = SubRipFile.open('subtitles.srt')
    """for sub in subs:
		#print sub.from_string()
		print sub.index
		#print sub.shift()
		print sub.start
		print sub.end
		print sub.text
		print "\n" """
    #print dir(subs)

    timecode_quote = {}
    for item in subs:
        item.text = re.sub("[%s]+" % re.escape(string.punctuation), "",
                           item.text)
        item.text = item.text.lower().strip()
        text = item.text.split("\n")[0]  # first line only

        for i, quote in enumerate(quotes_clean):
            if len(
                    text.split(" ")
            ) >= 3 and text in quote:  # we'll get a lot of false hits with only one word :/
                if quotes[i] not in timecode_quote.values():
                    timecode_quote[str(item.start)] = quotes[i]

    # #####  ###################################################
    tree = et.parse("project.xml")
    movie = tree.getroot()
    fps = float(movie.attrib["fps"])
    frames = float(movie.attrib["frames"])
    seconds = frames / fps
    #print seconds
    """start_frame = float( movie.attrib["start_frame"] )
	start_sec = startframe / fps"""

    # sort by timecode
    timecodes = timecode_quote.keys()
    timecodes.sort()

    f = open("quotes.txt", "w")
    for tc in timecodes:
        #print tc
        print "%.1f" % (100 * timecode_to_seconds(tc) / seconds) + "%", tc
        print timecode_quote[tc]
        print ""
        f.write("%f#%s\n" % (timecode_to_seconds(tc) / seconds,
                             timecode_quote[tc].replace("\n", "#")))
    f.close()

    print "<<", len(timecodes), "QUOTES >>"

    #raw_input("- done -")
    return
Example #48
0
 def input_file(self):
     if not hasattr(self, '_source_file'):
         self._source_file = SubRipFile.open(
             self.arguments.file, error_handling=SubRipFile.ERROR_LOG)
     return self._source_file
Example #49
0
 def setUp(self):
     self.duck = SubRipFile()
Example #50
0
 def setUp(self):
     self.file = SubRipFile()
Example #51
0
 def test_default_value(self):
     self.assertEquals(self.file.eol, os.linesep)
     srt_file = SubRipFile(eol='\r\n')
     self.assertEquals(srt_file.eol, '\r\n')
Example #52
0
 def test_multiple_item(self):
     srt_file = SubRipFile([
         SubRipItem(1, {'seconds': 0}, {'seconds': 3}, 'Hello'),
         SubRipItem(1, {'seconds': 1}, {'seconds': 2}, 'World !')
     ])
     self.assertEquals(srt_file.text, 'Hello\nWorld !')
Example #53
0
    def export_subtitle(source_file_path,
                        subs,
                        target_file_path,
                        frame_rate=25.0):
        """Export subtitle in the format determined by the file extension.

        Arguments:
            source_file_path {string} -- The path to the original subtitle file.
            subs {list} -- A list of SubRipItems.
            target_file_path {string} -- The path to the exported subtitle file.
            frame_rate {float} -- The frame rate for frame-based subtitle formats {default: 25.0}.
        """

        filename, file_extension = os.path.splitext(source_file_path.lower())
        if file_extension in Subtitle.SUBRIP_EXTENTIONS:
            SubRipFile(subs).save(target_file_path, encoding="utf8")
            Utils.remove_trailing_newlines(target_file_path)
        elif file_extension in Subtitle.TTML_EXTENSIONS:
            tree = ElementTree.parse(source_file_path)
            tt = tree.getroot()
            cues = (tt.find("tt:body", Subtitle.TT_NS).find(
                "tt:div", Subtitle.TT_NS).findall("tt:p", Subtitle.TT_NS))
            for index, cue in enumerate(cues):
                cue.attrib["begin"] = str(subs[index].start).replace(",", ".")
                cue.attrib["end"] = str(subs[index].end).replace(",", ".")

            # Change single quotes in the XML header to double quotes
            with open(target_file_path, "w", encoding="utf8") as target:
                normalised = (ElementTree.tostring(
                    tt, encoding="utf8", method="xml").decode("utf-8").replace(
                        "<?xml version='1.0' encoding='utf8'?>",
                        '<?xml version="1.0" encoding="utf8"?>',
                    ))
                target.write(normalised)
        elif file_extension in Subtitle.WEBVTT_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding="utf8")
                Utils.srt2vtt(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding="utf8")
                Utils.srt2ssa(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding="utf8")
                Utils.srt2ass(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MICRODVD_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding="utf8")
                Utils.srt2microdvd(path,
                                   target_file_path,
                                   frame_rate=frame_rate)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MPL2_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding="utf8")
                Utils.srt2mpl2(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.TMP_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding="utf8")
                Utils.srt2tmp(path, target_file_path)
            finally:
                os.remove(path)
        else:
            raise UnsupportedFormatException(
                "Unknown subtitle format for file: {}".format(
                    source_file_path))
Example #54
0
  def onInit( self ):
    filename = os.path.join(os.path.split(xbmc.Player().getPlayingFile())[0], xbmc.Player().getSubtitles())
		
    if not os.path.exists(filename):
      filename = os.path.join("special://temp", xbmc.Player().getSubtitles())

    if not os.path.exists(filename):
      xbmc.log(__scriptname__ + ": cannot find subtitle file!", xbmc.LOGERROR)
      dialog = xbmcgui.Dialog()
      dialog.ok('SubSeek', 'Sorry, the subtitle file could not be found...')
      xbmc.executebuiltin('XBMC.RunPlugin(plugin://script.xbmc.subtitles/)')
      self.exit_script()
      
    if not xbmc.Player().getSubtitles().split('.')[-1] == "srt":
      xbmc.log(__scriptname__ + ": incompatible subtitles", xbmc.LOGERROR)
      dialog = xbmcgui.Dialog()
      dialog.ok('SubSeek', 'Sorry, the subtitle file is not compatible. Please load a .srt')
      xbmc.executebuiltin('XBMC.RunPlugin(plugin://script.xbmc.subtitles/)')
      self.exit_script()

    xbmc.log(__scriptname__ + ": Subtitle file: " + filename, xbmc.LOGDEBUG)

    hashmatch = False
    pDialog = xbmcgui.DialogProgress()
    pDialog.create('SubSeek', 'Hashing subtitle file...')
    pDialog.update(0)
    f = open(filename, 'r')
    m = md5py.md5()
    for line in f:
    	m.update(line)
    hash = m.hexdigest()
    f.close()
    xbmc.log(__scriptname__ + ": Subtitle hash is "+hash, xbmc.LOGDEBUG)

    if os.path.exists(os.path.join("special://temp","subseek-indexdir","hash.txt")):
      f = open(os.path.join('special://temp', 'subseek-indexdir',"hash.txt"), 'r')
      if f.readline() == hash:
        hashmatch = True
        xbmc.log(__scriptname__ + ": Subtitle hash matches stored database, reusing archive", xbmc.LOGDEBUG)
      else:
        xbmc.log(__scriptname__ + ": Subtitle hash does not match stored database, building new database", xbmc.LOGDEBUG)
      f.close()
      
    if not hashmatch:
      shutil.rmtree(os.path.join("special://temp","subseek-indexdir"))
      os.mkdir(os.path.join("special://temp","subseek-indexdir"))

      self.archive = Nucular.Nucular(os.path.join("special://temp","subseek-indexdir"))
    
      (self.archive).create()

      pDialog = xbmcgui.DialogProgress()

      pDialog.create('SubSeek', 'Opening Subtitle File...')
      pDialog.update(0)

      subs = SubRipFile.open(filename, encoding='iso-8859-1')

      pDialog.create('SubSeek', 'Populating Database...')
      pDialog.update(0)
	
      for i in range(len(subs)):
        sub = subs[i]
        D = {   "content": sub.text.replace("\n", " ").replace("<i>", "[I]").replace("</i>", "[/I]"),
                "start": str(datetime.datetime(1,1,1,
                        sub.start.hours,
                        sub.start.minutes,
                        sub.start.seconds,
                        sub.start.milliseconds*1000)).split()[1]}
        (self.archive).indexDictionary(str(uuid.uuid4()), D)
        pDialog.update(int(math.floor(100*i/len(subs))))

      pDialog.update(100, 'Storing Database...')      
      (self.archive).store(lazy=False)
      
      f = open(os.path.join('special://temp', 'subseek-indexdir',"hash.txt"), "w")
      f.write(hash)
      f.close()
    else:
      (self.archive) = Nucular.Nucular(os.path.join("special://temp","subseek-indexdir"), readOnly=True)
    
    pDialog.close()
    pass
Example #55
0
def makeL1L2(L1_srt, L2_srt, out_srt, levels, save_sync, out_L1_utf8bom_srt, out_L2_utf8bom_srt, \
    show_L2, encoding, L1_color, L1_size, L2_color, L2_size):
    """
    Joins L1_srt and L2_srt subtitles and saves the result to out_srt.
    If save_sync is True, saves the synced srt files.
    If out_L1_utf8bom_srt is not empty, saves the L1 srt file converted to utf8-BOM to that path.
    If out_L2_utf8bom_srt is not empty, saves the L2 srt file converted to utf8-BOM to that path.
    If L1_color, L1_size, L2_color, L2_size are given, the subs are formatted accordingly
    """

    log("L1_srt: " + L1_srt)
    log("L2_srt: " + L2_srt)
    log("show_L2: " + show_L2)
    log("encoding: " + encoding)
    log("save_sync: ", save_sync)
    log("levels: ", levels)
    log("L1 color: {}, size: {}.".format(L1_color, L1_size))
    log("L2 color: {}, size: {}.".format(L2_color, L2_size))
    log("out_L1_utf8bom_srt: ", out_L1_utf8bom_srt)
    log("out_L2_utf8bom_srt: ", out_L2_utf8bom_srt)

    setSrtTemplates(L1_color, L1_size, L2_color, L2_size)

    # try to decode and save as utf8-bom
    L1_srt_bom = L1_srt + ".utf8bom"
    L2_srt_bom = L2_srt + ".utf8bom"

    makeFileUtf8Bom(L1_srt, L1_srt_bom)
    makeFileUtf8Bom(L2_srt, L2_srt_bom)

    subs_L1_orig = SubRipFile.open(L1_srt_bom)
    subs_L2_orig = SubRipFile.open(L2_srt_bom)

    subs_L1, dupes, fixed, subs_L2 = syncSrts(subs_L1_orig, subs_L2_orig)

    if save_sync:
        out_synced_L1 = L1_srt.replace(".srt", ".synced.srt")
        out_synced_L2 = L2_srt.replace(".srt", ".synced.srt")

        subs_L1.save(out_synced_L1, encoding=encoding)
        subs_L2.save(out_synced_L2, encoding=encoding)
        log("Saved {} and {}. Duplicate lines: {} Fixed: {}".format(
            out_synced_L1, out_synced_L2, dupes, fixed))

    outs = {}
    removed_lines = {}
    out_srts = {}
    for level in levels:
        out_srts[level] = out_srt.replace("{{LEVEL}}", level)
        outs[level] = SubRipFile()
        removed_lines[level] = 0

    for i in range(0, len(subs_L2)):
        processSub(subs_L1[i], subs_L2[i], levels, outs, removed_lines,
                   show_L2)

    for level in levels:
        summary = "level_criteria: {}. Hidden L1 lines: {} out of {}".format(
            level_criterias[level] if level != "0" else 'none',
            removed_lines[level], len(subs_L2))
        summaryItem = SubRipItem(1, {'milliseconds': 0}, {'milliseconds': 1},
                                 summary)
        outs[level].append(summaryItem)
        outs[level].clean_indexes()
        outs[level].save(path=out_srts[level], encoding=encoding)
        log("Saved {}. {} ".format(out_srts[level], summary))

    if (out_L1_utf8bom_srt):
        if os.path.isfile(out_L1_utf8bom_srt):
            os.remove(out_L1_utf8bom_srt)
        os.rename(L1_srt_bom, out_L1_utf8bom_srt)
    else:
        os.remove(L1_srt_bom)

    if (out_L2_utf8bom_srt):
        if os.path.isfile(out_L2_utf8bom_srt):
            os.remove(out_L2_utf8bom_srt)
        os.rename(L2_srt_bom, out_L2_utf8bom_srt)
    else:
        os.remove(L2_srt_bom)
Example #56
0
def merge_video_subtitle(video_id):
    """
    将video_id的中英vtt字幕转换为srt字幕,然后合并为srt格式的字幕
    :param video_id:
    :return:
    """
    video = Video.objects.get(pk=video_id)

    # Settings default values
    delta = SubRipTime(milliseconds=500)
    encoding = "utf_8"

    if (video.subtitle_cn != '') & (video.subtitle_en != ''):
        # convert_file(input_captions = video.subtitle_cn, output_writer)

        # vtt格式的字幕
        # subs_cn_vtt = SubRipFile.open(video.subtitle_cn.path,
        # encoding=encoding)
        # subs_en_vtt = SubRipFile.open(video.subtitle_en.path,
        # encoding=encoding)

        # 将vtt字幕转换为srt
        subs_cn_srt_filename = '%s-%s.cn.srt' % (
            get_valid_filename(video.title), video.video_id)
        subs_cn_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_cn_srt_filename)

        # 此功能失效
        # subs_cn_srt_result = convert_file(
        # input_captions=video.subtitle_cn.path,output_writer=subs_cn_srt)

        subs_cn_srt_result = convert_subtilte_format(srt_file=
                                                     video.subtitle_cn.path,
                                                     ass_file=subs_cn_srt_path)

        subs_en_srt_filename = '%s-%s.en.srt' % (
            get_valid_filename(video.title), video.video_id)
        subs_en_srt_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                        subs_en_srt_filename)
        # subs_en_srt_result = convert_file(
        # input_captions=video.subtitle_en.path,output_writer = subs_en_srt)
        subs_en_srt_path = convert_subtilte_format(srt_file=
                                                   video.subtitle_en.path,
                                                   ass_file=subs_en_srt_path)

        subs_cn_srt = SubRipFile.open(subs_cn_srt_path, encoding=encoding)
        subs_en_srt = SubRipFile.open(subs_en_srt_path, encoding=encoding)
        merge_subs = merge_subtitle(subs_cn_srt, subs_en_srt, delta)

        # 某些youtube视频的title有非ASCII的字符,或者/等不能出现在文件名中的字符
        # 所以使用django utils自带的get_valid_filename()转化一下
        # 注意:与youtube-dl自带的restrictfilenames获得的文件名不一样,
        # 也就是merge_subs_filename  与 subtitle_cn, subtitle_cn中名称可能会不一样
        # 标题中的 . 依然会保留
        merge_subs_filename = '%s-%s.zh-Hans.en.srt' % (
            get_valid_filename(video.title), video.video_id)

        merge_subs_path = os.path.join(YOUTUBE_DOWNLOAD_DIR,
                                       merge_subs_filename)

        merge_subs.save(merge_subs_path, encoding=encoding)

        video.subtitle_merge = merge_subs_path
        video.save(update_fields=['subtitle_merge'])
        return merge_subs_path
    else:
        return False
Example #57
0
def handle_tracks(tracks, start, end, fps, srt_filename):
    global XML_FILENAME, HUE_SAMPLING, DMX_SAMPLING, TRANSITION_TIME, DEBUG, VERBOSE
    track_list = []
    for track in tracks:
        track_list = handle_track_list(track, start, end, fps)
        # print(track_list[3][0])
        # try:
        #     print(len(track_list[3]),len(track_list[3][0]),track_list[3][0][1:10],track_list[3][-1][1:10])
        # except:
        #     pass

    # srt_file = open(srt_filename,"w")

    dmx_frame = zeros(512)
    prev_dmx_frame = zeros(512)
    prev_dmx_valid_frame = zeros(512)

    subrip_file = SubRipFile(path=srt_filename)

    print(40 * "-")
    print("Processing frames")
    print(40 * "-")
    # print(track_list[3][1])
    # print(len(track_list[1]))

    if len(track_list[1]) > 0:
        # If there isn't only an audio track
        # print(track_list[1][0])
        # print(track_list[1][0]!="audio")
        # print(len(track_list[1]) != 1 and track_list[1][0]!="audio")
        if (len(track_list[1]) != 1 or track_list[1][0] != "audio"):
            print("Number of lighting events: ", len(track_list[3][0]))
            frame_no = 0
            for i in range(len(track_list[3][0])):
                # frame_no = track_list[4][i]
                frame_no = i
                t = i * (1.0 / float(fps))
                if VERBOSE:
                    print(40 * "-")
                    # print(frame_no,fps)
                    print("Frame %s / time %s seconds" % (frame_no, t))
                    print(40 * "-")
                hue_cmd = ""
                dmx_cmd = ""
                # for the bug, len(of track_list[0]) is greater than
                # len(track_list[3])
                for j in range(len(track_list[0])):
                    # print(track_list[1][j])
                    if track_list[1][j] != "audio":
                        name = track_list[0][j]
                        type = track_list[1][j]
                        addr = track_list[2][j]
                        # print(name,type,addr)
                        # TODO: if frame_no = i as on line 181, the following line fails!
                        # [3][j] is out of range therefore j is the problem
                        try:
                            payload = track_list[3][j][i]
                        except Exception as e:
                            print(
                                'ERROR: could not get payload, len(of track_list[0]) is likely greater than \
                            len (track_list[3])')
                        # print(name, type, addr, payload)
                        # Convert Hue payload to hue command
                        if payload != "":
                            if addr[1:4].lower(
                            ) == "hue" and type == "OSCColor/floatarray":
                                if VERBOSE:
                                    print("hue", addr, payload)
                                r, g, b, a = 0, 0, 0, 0
                                try:
                                    payload_list = payload.split(",")
                                    # print(payload_list)
                                    if len(payload_list) == 3:
                                        r, g, b = payload_list
                                    elif len(payload_list) == 4:
                                        r, g, b, a = payload_list
                                except Exception as e:
                                    print(e)

                                h, s, v = rgb_to_hsv(float(r), float(g),
                                                     float(b))

                                h *= 65535.0
                                s *= 254.0
                                v *= 254.0

                                h = int(h)
                                s = int(s)
                                v = int(v)
                                # print("hue", addr, payload, h,s,v)
                                n = int(addr[4:])
                                # print("hue", n, h,s,v)
                                if len(hue_cmd) == 0:
                                    hue_cmd += "HUE%s(%s,%s,%s,%s)" % (
                                        n, h, s, v, TRANSITION_TIME)
                                else:
                                    hue_cmd += ";HUE%s(%s,%s,%s,%s)" % (
                                        n, h, s, v, TRANSITION_TIME)
                            # Convert single DMX channel to command
                            elif addr[1:4].lower(
                            ) == "dmx" and type == "OSCValue/float":
                                if VERBOSE:
                                    print("dmx value", addr, payload)
                                n = int(addr[4:])
                                if payload != "":
                                    dmx_frame[int(n)] = int(
                                        float(payload) * 254)
                            # Convert multiple DMX channels to command
                            elif addr[1:4].lower() == "dmx" and (
                                    type == "OSCColor/floatarray"
                                    or type == "OSCValue/standard"):
                                if VERBOSE:
                                    print("dmx colour", addr, payload)
                                n = int(addr[4:])
                                if payload != "":
                                    payload_list = payload.split(",")
                                    for channel in payload_list:
                                        dmx_frame[int(n)] = int(
                                            float(channel) * 254)
                                        n += 1

                # Output HUE commands
                # hue_t = frame_no * (1.0/HUE_SAMPLING)
                if frame_no % fps == 0 and hue_cmd != "":
                    item = SubRipItem(frame_no, text=hue_cmd)
                    item.shift(seconds=t)
                    item.end.shift(seconds=1)
                    if VERBOSE:
                        print(item)
                    else:
                        print("h", end="")
                        stdout.flush()
                    subrip_file.append(item)
                    frame_no += 1

                # Output DMX command
                dmx_frame_trimmed = trim_zeros(dmx_frame, 'b').astype('uint8')

                # print("dmx_frame_trimmed before",dmx_frame_trimmed)

                # if len(dmx_frame_trimmed)==0:
                #     dmx_frame_trimmed = zeros(512)

                # print("dmx_frame_trimmed after",dmx_frame_trimmed)

                dmx_cmd = "DMX1" + str(tuple(dmx_frame_trimmed)[1:]).replace(
                    " ", "")

                if VERBOSE:
                    print('dmx_cmd to be written: ', dmx_cmd)

                # cmd = hue_cmd + ";" + dmx_cmd
                if (not array_equal(dmx_frame_trimmed,
                                    prev_dmx_frame)) or (frame_no % fps == 0):
                    # if frame_no % fps == 0 and dmx_cmd=="":
                    # if frame_no % fps == 0:
                    #     print(dmx_cmd, prev_dmx_frame)

                    # Fix for and empty DMX command
                    # Usually found at the start of a treatment track
                    if dmx_cmd == "DMX1()":
                        item = dmx_cmd = "DMX1" + str(
                            tuple(zeros(512, dtype=int))[1:]).replace(" ", "")

                    item = SubRipItem(frame_no, text=dmx_cmd)
                    item.shift(seconds=t)
                    item.end.shift(seconds=1.0 / fps)

                    if VERBOSE:
                        print(item)
                    else:
                        print("d", end="")
                        stdout.flush()

                    subrip_file.append(item)
                    frame_no += 1
                prev_dmx_frame = dmx_frame_trimmed
                # print(cmd)
                if VERBOSE:
                    print(40 * "-")
                    # print(track_list[0][j], track_list[1][j], track_list[2][j], track_list[3][j][i])
                    # print(frame)
                    # j = 1
                    # for frame in track:
                    #     print(track_list[0][i] + " " +frame, end = " ")
                    #     j += 1
                    # print()
    encoding = "utf_8"
    subrip_file.save(srt_filename, encoding=encoding)
    print()
Example #58
0
 def test_shift(self):
     srt_file = SubRipFile([SubRipItem()])
     srt_file.shift(1, 1, 1, 1)
     self.assertEquals(srt_file[0].end, (1, 1, 1, 1))
     srt_file.shift(ratio=2)
     self.assertEquals(srt_file[0].end, (2, 2, 2, 2))
Example #59
0
    def __save_subtitle_by_extension(file_extension: str,
                                     subs: List[SubRipItem],
                                     source_file_path: str,
                                     target_file_path: str,
                                     encoding: str,
                                     frame_rate: Optional[float],
                                     is_exporting: bool = False):
        if file_extension in Subtitle.SUBRIP_EXTENTIONS:
            SubRipFile(subs).save(target_file_path, encoding=encoding)
            Utils.remove_trailing_newlines(target_file_path, encoding)
        elif file_extension in Subtitle.TTML_EXTENSIONS:
            if is_exporting:
                tree = ElementTree.parse(source_file_path)
                tt = tree.getroot()
                cues = (tt.find("tt:body", Subtitle.TT_NS).find(
                    "tt:div",
                    Subtitle.TT_NS).findall("tt:p",
                                            Subtitle.TT_NS))  # type: ignore
                for index, cue in enumerate(cues):
                    cue.attrib["begin"] = str(subs[index].start).replace(
                        ",", ".")
                    cue.attrib["end"] = str(subs[index].end).replace(",", ".")

                # Change single quotes in the XML header to double quotes
                with open(target_file_path, "w", encoding=encoding) as target:
                    if "xml_declaration" in inspect.getfullargspec(
                            ElementTree.tostring
                    ).kwonlyargs:  # for >= python 3.8
                        encoded = ElementTree.tostring(tt,
                                                       encoding=encoding,
                                                       method="xml",
                                                       xml_declaration=True)
                    else:
                        encoded = ElementTree.tostring(tt,
                                                       encoding=encoding,
                                                       method="xml")
                    normalised = encoded.decode(encoding) \
                        .replace("<?xml version='1.0' encoding='", '<?xml version="1.0" encoding="',) \
                        .replace("'?>", '"?>')
                    target.write(normalised)
            else:
                try:
                    _, path = tempfile.mkstemp()
                    SubRipFile(subs).save(path, encoding=encoding)
                    Utils.srt2ttml(path, target_file_path)
                finally:
                    os.remove(path)
        elif file_extension in Subtitle.WEBVTT_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2vtt(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ssa(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.ADVANCED_SSA_EXTENTIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2ass(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MICRODVD_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2microdvd(path,
                                   target_file_path,
                                   frame_rate=frame_rate)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.MPL2_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2mpl2(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.TMP_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2tmp(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.SAMI_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(path, encoding=encoding)
                Utils.srt2sami(path, target_file_path)
            finally:
                os.remove(path)
        elif file_extension in Subtitle.STL_EXTENSIONS:
            try:
                _, path = tempfile.mkstemp()
                SubRipFile(subs).save(target_file_path, encoding=encoding)
            finally:
                os.remove(path)
        else:
            raise UnsupportedFormatException(
                "Unknown subtitle format for file: {}".format(
                    source_file_path))