Example #1
0
File: iatv.py Project: mtpain/iatv
def _srt_gen_from_url(base_url, end_time=3660, verbose=True):

    dt = 60
    t0 = 0
    t1 = t0 + dt

    has_next = True
    first = True
    srt = ''

    last_end = 0.0
    while has_next:

        if verbose:
            print('fetching captions from ' +
                  base_url + '?t={}/{}'.format(t0, t1))

        if first:
            first = False
            res = requests.get(base_url, params={'t': '{}/{}'.format(t0, t1)})
            res.raise_for_status()

            srt = res.text.replace(u'\ufeff', '')

        else:
            res = requests.get(base_url, params={'t': '{}/{}'.format(t0, t1)})

            res.raise_for_status()

            srt = res.text

        t0 = t1 + 1
        t1 = t1 + dt
        has_next = t1 <= end_time

        if srt:

            cc = CaptionConverter()
            cc.read(srt, SRTReader())
            captions = cc.captions.get_captions(lang='en-US')

            if first:
                last_end = captions[-1].end

            else:
                for caption in captions:
                    caption.start += last_end
                    caption.end += last_end

                last_end = captions[-1].end

            srt = cc.write(SRTWriter())

            yield srt.replace('\n\n', ' \n\n')

        else:
            yield ''
Example #2
0
def toSrt(file_name):
	converter = CaptionConverter()
	transcript = open(file_name + '.xml')
	new_transcript = transcript.read().replace('&#x266A;', '')
	converter.read(unicode(new_transcript), DFXPReader())
	f = open(file_name + '.srt', 'w')
	string = converter.write(SRTWriter())
	f.write(string)
	f.close()
Example #3
0
def route_subtitles(course_id, lecture_id):
    subtitles_url = (
            'https://class.coursera.org/%s-001/lecture/subtitles?q=%d_en' %
            (course_id, lecture_id))
    r = requests.get(subtitles_url)
    try:
        converter = CaptionConverter()
        converter.read(r.text, SRTReader())
        subtitles = converter.write(WebVTTWriter())
    except CaptionReadNoCaptions:
        subtitles = ''
    return Response(subtitles, content_type='text/vtt')
Example #4
0
File: iatv.py Project: mtpain/iatv
def _make_ts_from_srt(srt):

    c = CaptionConverter()

    srt = re.sub('$', ' ', srt).replace('\n\n', ' \n\n')

    srt = unicodedata.normalize('NFC', srt)

    srt = ''.join(i for i in srt
                  if unicodedata.category(i)[0] != 'C' or i == '\n')

    c.read(srt, SRTReader())

    ts = c.write(TranscriptWriter()).replace(u'>>> ', u'>>').replace('\n', ' ')

    return ts.split('>>')
Example #5
0
def dfxpToSrt(myFile, keep, verbose):
    if verbose:
        print "--- Renaming to %s.dfxp" % myFile
    os.rename(myFile, "%s.dfxp" % myFile)
    print "--- Converting to srt"
    sourceFile = open("%s.dfxp" % myFile)  # open copy as source
    caps = sourceFile.read()  # read source
    converter = CaptionConverter()  # set pycaptions converter
    converter.read(caps, DFXPReader())  # read sami
    with open(myFile, "w") as targetFile:  # open target
        targetFile.write(converter.write(SRTWriter()))  # write target
    sourceFile.close()  # close source
    targetFile.close()  # close target
    if not keep:
        if verbose:
            print "--- Deleting temporary file %s.dfxp" % myFile
        os.remove("%s.dfxp" % myFile)
Example #6
0
def webvttToSrt(myFile, keep, verbose):
    if verbose:
        print "--- Renaming to %s.webvtt" % myFile
    os.rename(myFile, "%s.webvtt" % myFile)
    print "--- Converting to srt"
    sourceFile = codecs.open("%s.webvtt" % myFile, "r", encoding="utf8")  # open copy as source
    caps = sourceFile.read()  # read source
    converter = CaptionConverter()  # set pycaptions converter
    converter.read(caps, WebVTTReader())  # read sami
    with codecs.open(myFile, "w", encoding="utf8") as targetFile:  # open target
        targetFile.write(converter.write(SRTWriter()))  # write target
    sourceFile.close()  # close source
    targetFile.close()  # close target
    if not keep:
        if verbose:
            print "--- Deleting temporary file %s.webvtt" % myFile
        os.remove("%s.webvtt" % myFile)
Example #7
0
def from_srt(input_f, output_f):
  """
    Takes an input SRT file or filename and writes out VTT contents to the given 
    output file or filename
  """
  with vtt_open(input_f, 'r') as f:
    orig = f.read()

    detect = chardet.detect(orig)
    encoding = detect['encoding']
    confidence = detect['confidence']
    default_subrip_encoding = 'cp1252' # standard for SubRip files

    if confidence < 0.9:
      encoding = default_subrip_encoding

    backups = [default_subrip_encoding,'utf8']

    while True:
      try:
        print "ENCODING: " + encoding
        contents = orig.decode(encoding)
        break
      except UnicodeDecodeError as e:
        if len(backups) is 0:
          raise
          break
        encoding = backups.pop(0)


    # caption converter seems to have a tough time with the BOM on
    # Python < 2.7.8, so ditch it if it exists.
    contents = contents[3:] if contents[:3] == codecs.BOM_UTF8 else contents

  converter = CaptionConverter()
  converter.read(contents, SRTReader())
  contents = converter.write(WebVTTWriter())

  with vtt_open(output_f, 'w') as o:
    o.write(contents.encode('utf-8')[:-1])
def convertText(inputfile):
    file_names = os.listdir(inputfile) 
    for sample_name in file_names:         
        with open(inputfile+"/" + sample_name, 'r') as f:
            read_data = f.read()
        f.closed
        read_data = read_data.decode("utf8")
        read_data = unicode(read_data)
        converter = CaptionConverter()
        converter.read(read_data, WebVTTReader())
        f = converter.write(SRTWriter())
        trythis = list(f)
        myre = '([0-9]){2}:([0-9]){2}:([0-9]){2},([0-9]){3}'
        myre2 = '([0-9])+'   
        char = ""        
        words = []
        for i in trythis:
            if i!="\n":
                char = char + i
            else:
                if re.search(myre, char): 
                    char = ""
                elif re.search(myre2, char):
                    char = ""
                elif char == '-->':
                    char = ""
                else:
                    words.append(char)
                    char = ""
                                
                    
        words =  set(words)
        f = " ".join(words)
        output_file = open("/Users/lavanyasunder1/Subtitles/TXT/%s.txt" % (sample_name), 'w+') 
        output_file.write(f.encode("utf-8")) 
        output_file.write("\n")
        output_file.close()