def download_video(vid,folder): logr = setup_vid_logger(vid) try: vidmeta = load_video_meta(vid) except ytd_exception_meta as e: if (e.errtype == "PAGE_FETCH_ERR"): logr.critical("\t{} :{}".format(e.errmsg,e.msgstr)) if (e.errtype == "YOUTUBE_ERROR"): logr.critical(e.errmsg) logr.info("-"*45+"\n"+e.msgstr+"\n"+"-"*45) if (e.errtype == "BAD_PAGE") : logr.critical("\t"+e.errmsg) print_pretty(logr,"Parsing failed: vid_meta "+"="*20,e.vidmeta) if (e.errtype == "NO_STREAMS") : logr.info("\tTitle:'%s'\n\tAuthor:'%s'",e.vidmeta['title'],e.vidmeta['author']) logr.critical("\t"+e.errmsg) print_pretty(logr,"Parsing failed: vid_meta "+"="*20,e.vidmeta) if(deep_debug): write_to_file(vid+".html",e.page['contents']) return print_pretty(logr,"Parsing successful: vid_meta "+"="*20,vidmeta) smap = vidmeta['stream_map'] sm = smap['std'] + smap['adp_v'] + smap['adp_a'] + smap['caption'] logr.debug("= Available Streams: "+"="*25+"\n"+"\n".join(map(smap_to_str,sm))) vidmeta['select_map'] = sl = select_best_stream(smap) logr.debug("= Selected Streams: "+"="*25+"\n"+"\n".join(map(smap_to_str,sl))+"\n") # stream_map, select_map can be public elements so that they can be logged and print outside. logr.info("\tTitle:'%s'\n\tAuthor:'%s'",vidmeta['title'],vidmeta['author']) download_streams(vidmeta,folder) download_caption(vidmeta,folder) logr.info("\tFetch Complete @ %s ----------------",str(datetime.datetime.now())) return
def parse_watch_page(wpage): page = wpage["contents"] arg_keys = { "length_seconds", "loudness", "timestamp", "host_language", "avg_rating", "view_count", "thumbnail_url", "fmt_list", "adaptive_fmts", "url_encoded_fmt_stream_map", "caption_tracks", "caption_translation_languages", } prop_keys = { "og:title": "title", "og:description": "description", "og:type": "type", "og:url": "url", "og:image": "fullimage_url", "og:video:url": "embed_url", } iprop_keys = { "videoId": "vid", "channelId": "chid", "datePublished": "datePublished", "genre": "genre", "regionsAllowed": "regionsAllowed", "isFamilyFriendly": "isFamilyFriendly", "paid": "paid", } vid_meta = dict() # extract dom tree of HTML Page tree = html.fromstring(page) # extract player script script = tree.xpath('//script[contains(.,"ytplayer")]/text()') player_script = extract_player_args(script) if player_script == "": plerror = " ".join(map(str.strip, tree.xpath('//div[@id="player-unavailable"]//text()'))) raise ytd_exception_meta("YOUTUBE_ERROR", wpage, vid_meta, plerror) # extract player args from the player script arg_list = json.loads(player_script) args = arg_list["args"] if arg_list.has_key("args") else None # populate the attributes vid_meta["author"] = " ".join(map(str.strip, tree.xpath("//div[@class='yt-user-info']//text()"))).strip() vid_meta["author_url"] = default_hurl + tree.xpath("//div[@class='yt-user-info']/a/@href")[0] vid_meta["keywords"] = tree.xpath("//meta[@name='keywords']/@content")[0].split(",") for k in prop_keys: v = tree.xpath("//meta[@property='" + k + "']/@content") vid_meta[prop_keys[k]] = v[0] if (len(v) > 0) else "" for k in iprop_keys: v = tree.xpath("//meta[@itemprop='" + k + "']/@content") vid_meta[iprop_keys[k]] = v[0] if (len(v) > 0) else "" if args != None: vid_meta["player_args"] = True # we don't quite need this but still! for k in arg_keys: vid_meta[k] = args[k] if (args.has_key(k)) else "" vid_meta["country"] = args["cr"] if (args.has_key("cr")) else "" vid_meta["has_caption"] = True if vid_meta["caption_tracks"] != "" else False f = args["fmt_list"].split(",") vid_meta["max_res"] = f[0].split("/")[1] if (f != None) else 0 vid_meta["filesize"] = 0 # right now we don't know else: vid_meta["player_args"] = False vid_meta["max_res"] = 0 vid_meta["has_caption"] = False write_to_file(vid_meta["vid"] + ".html", player_script) return vid_meta