def download_caption(vidmeta, folder): logr = logging.getLogger(vid) title = clean_up_title(vidmeta['title']) uid = vidmeta['vid'] select_map = vidmeta['select_map'] path = folder.rstrip('/')+"/"+str(title)+"_-_"+str(uid)+"."+"srt" for smap in select_map: media = smap['media'] if(media == "caption"): capDom = minidom.parse( urllib.urlopen(smap['url']) ) texts = capDom.getElementsByTagName('text') hp = HTMLParser() f = open(path,'w') for i, text in enumerate(texts): fstart = float(text.getAttribute('start')) start = convert_time_format(fstart) fdur = float(text.getAttribute('dur')) dur = convert_time_format(fstart+fdur) t = text.childNodes[0].data f.write('%d\n'%(i)) f.write('%s --> %s\n'%(start, dur)) f.write(hp.unescape(t).encode(sys.getfilesystemencoding())) f.write('\n\n') logr.info("\t%s\n\tSaved in: => %s",smap_to_str(smap),path) break;
def download_streams(vidmeta, folder): logr = logging.getLogger(vid) title = clean_up_title(vidmeta['title']) uid = vidmeta['vid'] select_map = vidmeta['select_map'] out_fmt = "mp4" separated = 1; # Assume sepeated content by default. If not, no need to merge temp_files = dict(); for smap in select_map: url = smap['url'] media = smap['media'] if(media == "caption"): continue elif(media == "audio-video"): outfile = filename = folder.rstrip('/')+"/"+str(title)+"_-_"+str(uid)+"."+str(smap['fmt']) separated = 0; else: filename = folder.rstrip('/')+"/"+str(uid)+"."+str(smap['media'])+"."+str(smap['fmt']) temp_files[media] = filename logr.info("\t%s",smap_to_str(smap)) logr.debug("\tSaving URL: %s\n\tto %s",smap['url'],filename) t0 = datetime.datetime.now() socket.setdefaulttimeout(120) fname, msg = urllib.urlretrieve(url,filename,reporthook=dlProgress) t1 = datetime.datetime.now() sys.stdout.write("\r") sys.stdout.flush() logr.debug("%sTime taken %s\n---------------------------------",msg,str(t1-t0)) if(separated == 1): outfile = folder.rstrip('/')+"/"+str(title)+"_-_"+str(uid)+"."+out_fmt combine_streams(temp_files,outfile,1) logr.info("\t[Outfile] '%s'",outfile)