Ejemplo n.º 1
0
def download_caption(vidmeta, folder):
	logr = logging.getLogger(vid) 

	title = clean_up_title(vidmeta['title']) 
	uid = vidmeta['vid'] 
	select_map = vidmeta['select_map']
	path = folder.rstrip('/')+"/"+str(title)+"_-_"+str(uid)+"."+"srt"

	for smap in select_map:
		media = smap['media']
		if(media == "caption"):
			capDom = minidom.parse(
			urllib.urlopen(smap['url'])
			)
			texts = capDom.getElementsByTagName('text')
			hp = HTMLParser()
			f = open(path,'w')
			for i, text in enumerate(texts):
				fstart = float(text.getAttribute('start'))
				start = convert_time_format(fstart)
				fdur = float(text.getAttribute('dur'))
				dur = convert_time_format(fstart+fdur)
				t = text.childNodes[0].data
				f.write('%d\n'%(i))
				f.write('%s --> %s\n'%(start, dur))
				f.write(hp.unescape(t).encode(sys.getfilesystemencoding()))
				f.write('\n\n')
			logr.info("\t%s\n\tSaved in: => %s",smap_to_str(smap),path) 
			break;
Ejemplo n.º 2
0
def extract_playlist(plid,pl_page): 

	tree = html.fromstring(pl_page['contents'])

	t = tree.xpath('//h1[@class="pl-header-title"]/text()')
	title = clean_up_title(t[0]) if (len(t) > 0) else "unknown title" 
	owner = tree.xpath('//h1[@class="branded-page-header-title"]/span/span/span/a/text()')[0]
	playlist = { 'plid': plid, 'title': title, 'owner': owner } 

	plist = parse_playlist(tree) 
	lmurl = parse_lmwidget(tree) 
	
	count = 1
	while (len(lmurl)>0): 
		#print "Loading next ... "+lmurl 
		ajax_resp = load_more_ajax(lmurl) 
		if(ajax_resp['error'] <0 ): 
			print "Error extracting load more... returning the list" 
			break
		pl = parse_playlist(ajax_resp['list_content'],len(plist))
		plist.extend(pl)
		lmurl = parse_lmwidget(ajax_resp['lm_widget']) 
		count += 1
	
	playlist['list'] = plist
	prune_playlist(playlist)
	print_playlist_header(playlist) 

	if(load_sequential) : 
		playlist['list'] = load_meta_info(plist) 
	else : 
		playlist['list'] = load_meta_info_parallel(plist) 

	return playlist 
Ejemplo n.º 3
0
def parse_playlist(list_content,last=0):
	plist = list() 
	count = 1
	tstr = '//table[@id="pl-video-table"]/tbody/tr'
	i = last
	for l in list_content.xpath(tstr):
		vid     = list_content.xpath(tstr+"["+str(count)+"]/@data-video-id")[0] 
		title    = clean_up_title(list_content.xpath(tstr+"["+str(count)+"]/@data-title")[0]) 
		t     = list_content.xpath(tstr+"["+str(count)+"]/td[@class='pl-video-time']/div/div[@class='timestamp']/span/text()")
		time = t[0]  if (t) else "00:00" 
		i = i+1 
		plitem = ({'index': i, 'vid':vid,'title':title,'duration':str(time) }) 
		plist.append(plitem) 
		count += 1; 

	return plist 
Ejemplo n.º 4
0
def download_streams(vidmeta, folder):
	logr = logging.getLogger(vid) 

	title = clean_up_title(vidmeta['title']) 
	uid = vidmeta['vid'] 
	select_map = vidmeta['select_map']
	out_fmt = "mp4"
 
	separated = 1; 	# Assume sepeated content by default. If not, no need to merge 
	temp_files = dict(); 
	for smap in select_map:
		url = smap['url']
		media = smap['media']
		if(media == "caption"):
			continue
		elif(media == "audio-video"):
			outfile = filename = folder.rstrip('/')+"/"+str(title)+"_-_"+str(uid)+"."+str(smap['fmt'])
			separated = 0;
		else:
			filename = folder.rstrip('/')+"/"+str(uid)+"."+str(smap['media'])+"."+str(smap['fmt'])
			temp_files[media] = filename 

		logr.info("\t%s",smap_to_str(smap)) 
		logr.debug("\tSaving URL: %s\n\tto %s",smap['url'],filename) 
		t0 = datetime.datetime.now() 
		socket.setdefaulttimeout(120)
		fname, msg = urllib.urlretrieve(url,filename,reporthook=dlProgress) 
		t1 = datetime.datetime.now() 
		sys.stdout.write("\r")
		sys.stdout.flush()
		logr.debug("%sTime taken %s\n---------------------------------",msg,str(t1-t0)) 
	
	if(separated == 1):
		outfile = folder.rstrip('/')+"/"+str(title)+"_-_"+str(uid)+"."+out_fmt 
		combine_streams(temp_files,outfile,1)

	logr.info("\t[Outfile] '%s'",outfile)