Example #1
0
def download_video(vid,folder):
	logr = setup_vid_logger(vid) 

	try:
		vidmeta = load_video_meta(vid)
	except ytd_exception_meta as e: 
		if (e.errtype == "PAGE_FETCH_ERR"): 
			logr.critical("\t{} :{}".format(e.errmsg,e.msgstr))
		if (e.errtype == "YOUTUBE_ERROR"): 
			logr.critical(e.errmsg) 
			logr.info("-"*45+"\n"+e.msgstr+"\n"+"-"*45) 
		if (e.errtype == "BAD_PAGE") : 
			logr.critical("\t"+e.errmsg) 
			print_pretty(logr,"Parsing failed: vid_meta "+"="*20,e.vidmeta) 
		if (e.errtype == "NO_STREAMS") : 
			logr.info("\tTitle:'%s'\n\tAuthor:'%s'",e.vidmeta['title'],e.vidmeta['author'])  
			logr.critical("\t"+e.errmsg) 
			print_pretty(logr,"Parsing failed: vid_meta "+"="*20,e.vidmeta) 
				
		if(deep_debug): 
			write_to_file(vid+".html",e.page['contents']) 
		return 

	print_pretty(logr,"Parsing successful: vid_meta "+"="*20,vidmeta) 

	smap = vidmeta['stream_map']
	sm = smap['std'] + smap['adp_v'] + smap['adp_a'] + smap['caption'] 
	logr.debug("= Available Streams: "+"="*25+"\n"+"\n".join(map(smap_to_str,sm)))
	 
	vidmeta['select_map'] = sl =  select_best_stream(smap) 
	logr.debug("= Selected Streams: "+"="*25+"\n"+"\n".join(map(smap_to_str,sl))+"\n")  

	# stream_map, select_map can be public elements so that they can be logged and print outside. 
	logr.info("\tTitle:'%s'\n\tAuthor:'%s'",vidmeta['title'],vidmeta['author'])  
	download_streams(vidmeta,folder)	
	download_caption(vidmeta,folder)
	logr.info("\tFetch Complete @ %s ----------------",str(datetime.datetime.now()))

	return
Example #2
0
def parse_watch_page(wpage):

    page = wpage["contents"]
    arg_keys = {
        "length_seconds",
        "loudness",
        "timestamp",
        "host_language",
        "avg_rating",
        "view_count",
        "thumbnail_url",
        "fmt_list",
        "adaptive_fmts",
        "url_encoded_fmt_stream_map",
        "caption_tracks",
        "caption_translation_languages",
    }

    prop_keys = {
        "og:title": "title",
        "og:description": "description",
        "og:type": "type",
        "og:url": "url",
        "og:image": "fullimage_url",
        "og:video:url": "embed_url",
    }
    iprop_keys = {
        "videoId": "vid",
        "channelId": "chid",
        "datePublished": "datePublished",
        "genre": "genre",
        "regionsAllowed": "regionsAllowed",
        "isFamilyFriendly": "isFamilyFriendly",
        "paid": "paid",
    }

    vid_meta = dict()
    # extract dom tree of HTML Page
    tree = html.fromstring(page)

    # extract player script
    script = tree.xpath('//script[contains(.,"ytplayer")]/text()')
    player_script = extract_player_args(script)
    if player_script == "":
        plerror = " ".join(map(str.strip, tree.xpath('//div[@id="player-unavailable"]//text()')))
        raise ytd_exception_meta("YOUTUBE_ERROR", wpage, vid_meta, plerror)

        # extract player args from the player script
    arg_list = json.loads(player_script)
    args = arg_list["args"] if arg_list.has_key("args") else None

    # populate the attributes
    vid_meta["author"] = " ".join(map(str.strip, tree.xpath("//div[@class='yt-user-info']//text()"))).strip()
    vid_meta["author_url"] = default_hurl + tree.xpath("//div[@class='yt-user-info']/a/@href")[0]
    vid_meta["keywords"] = tree.xpath("//meta[@name='keywords']/@content")[0].split(",")

    for k in prop_keys:
        v = tree.xpath("//meta[@property='" + k + "']/@content")
        vid_meta[prop_keys[k]] = v[0] if (len(v) > 0) else ""

    for k in iprop_keys:
        v = tree.xpath("//meta[@itemprop='" + k + "']/@content")
        vid_meta[iprop_keys[k]] = v[0] if (len(v) > 0) else ""

    if args != None:
        vid_meta["player_args"] = True  # we don't quite need this but still!
        for k in arg_keys:
            vid_meta[k] = args[k] if (args.has_key(k)) else ""

        vid_meta["country"] = args["cr"] if (args.has_key("cr")) else ""
        vid_meta["has_caption"] = True if vid_meta["caption_tracks"] != "" else False
        f = args["fmt_list"].split(",")
        vid_meta["max_res"] = f[0].split("/")[1] if (f != None) else 0
        vid_meta["filesize"] = 0  # right now we don't know
    else:
        vid_meta["player_args"] = False
        vid_meta["max_res"] = 0
        vid_meta["has_caption"] = False

    write_to_file(vid_meta["vid"] + ".html", player_script)
    return vid_meta