def start(self): ident = self.find_ident() if ident is None: print "OK.RU: Unsupported url!" return None m_url = "https://m.ok.ru/video/" + ident params = self.curl_get_default_params() """ curl 'https://m.ok.ru/video/94020831980' -H 'Host: m.ok.ru' -H 'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Cookie: bci=-1960779979281354807; landref=ydalenka.ru; __dc=on; SERVERID=bcf0ec7670cbc75ebc8f0ce9d1a73887|WO/8C; TimezoneOffset=-180; ClientTimeDiff=-539; ClientTimeStr=14_3_2017_1_20_19; DCAPS=dpr%5E1%7Cvw%5E1600%7Csw%5E1600%7C' -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' -H 'Cache-Control: max-age=0' """ params["headers"]["Host"] = 'm.ok.ru' try: answ = CUrl.download(m_url, 'compressed', **params) except Exception as err: print "OK.RU: Can't load mobile video page! May be wrong url?" return None try: dt = re.findall('\<div id\=\"content\"(?:.*?)\<div id=\"mvplayer_cont\"(?:.*?)data\-video(?:.*?)href=\"(.*?)\"', answ) url = dt[0] except Exception as err: print "MAIL.RU: No video found!" extention = "mp4" flname = "%s.%s" % (ident, extention) print "OK.RU: DOWNLOADING:", url params = self.curl_get_default_params() CUrl.download(url, 'globoff', 'compressed', L=True, print_status=True, output=flname, **params) print print "Saved as: %s" % flname return None
def get_index(self): kwargs = { 'headers' : self.default_headers, 'cookie-jar': self.cookies_jar_file, 'cookie': self.cookies_jar_file, } counter = 0 while True: answ = CUrl.download(self.url, 'compressed', **kwargs) ## Not authed: if len(re.findall("""LoginForm_username""", answ)) > 0: self.login() continue a_ = re.sub("\n+", " ", answ) uname = self.re_login.findall(a_) if len(uname) > 0: return uname[0], answ counter += 1 if counter >= 5: print "Error on downloading url!" print answ print "============================" sys.exit(1)
def start(self): uname, answ = self.get_index() print "User: "******"""loadLectureCode\('(\d+)', 'code'\);""", answ)[0]); except Exception as err: print "Wrong lecture code!" raise err lcode_kwargs = { 'headers' : self.default_headers, 'cookie': self.cookies_jar_file, 'data': """id=%s&type=code""" % lecture_code, } answ = CUrl.download("%s/lecture/loadCode" % self.host, 'compressed', **lcode_kwargs) try: jcode = json.loads(answ)['code'] except Exception as err: print "Can't encode JSON of handler: /lecture/loadCode" raise err try: urls = re.findall(r'<iframe src="(.*?)"', jcode) ## Get all videos from page: for id, url in enumerate(urls): self.get_video(id, url) except Exception as err: print "Can't find video config url!" raise err
def get_play_info(self, id): url = "%s/api/play/options/%s/?format=json&sqr4374_compat=1&no_404=true" % (self.host, id) params = self.curl_get_default_params() params['headers']['Accept'] = "application/json, text/javascript, */*; q=0.01" params['headers']['Content-Type'] = "application/json" answ = CUrl.download(url, 'compressed', **params) return json.loads(answ)
def start(self): api_url = self.find_ident() if api_url is None: print "MAIL.RU: Unsupported url!" return None params = self.curl_get_default_params() try: answ = CUrl.download(api_url, 'compressed', **params) data = json.loads(answ) #print "DATA", json.dumps(data, indent=4) except Exception as err: print "MAIL.RU: Can't load video data, may be wrong url?" return None flname = "%s" % re.sub("""[\"\,\.\'\s\t\&\;\$\*]+""", "_", data["meta"]["title"]) hq = 0 url = None for v in data["videos"]: hq_ = int(v["key"].replace("p", "")) if hq_ > hq: hq = hq_ url = v["url"] if url is None: print "MAIL.RU: No video found!" flext = re.findall("""\/\d+\.(.*?)\?""", url)[0] flname += ".%s" % flext print "MAIL.RU: DOWNLOADING:", url CUrl.download(url, 'globoff', 'compressed', print_status=True, output=flname, **params) print print "Saved as: %s" % flname
def start(self): params = self.curl_get_default_params() answ = CUrl.download(self.url, 'compressed', **params) ident = self.re_ident.findall(answ) if len(ident) == 0: print "Can't find video ident!" return None print "Found ruTube ident: %s" % ident[0] self.engine.find_plugin("https://rutube.ru/play/embed/%s" % ident[0])
def start(self, id): print "RUTUBE Embed..." url = "%s/play/embed/%s" % (self.main.host, id) params = self.main.curl_get_default_params() answ = CUrl.download(url, 'compressed', **params) ident = self.re_ident.findall(answ) if len(ident) == 0: print "Can't find video ident!" return None return MainPage(self.main).start(ident[0])
def login(self): login = raw_input("Login: "******"Password: "******"LoginForm%5Busername%5D=" + login + "&LoginForm%5Bpassword%5D=" + passwd, 'cookie': self.cookies_jar_file, 'cookie-jar': self.cookies_jar_file, 'dump-header': 'testheader', } print "LOGIN", answ = CUrl.download(self.url_login, 'compressed', **kwargs)
def get_route(self, id): pinfo = self.get_play_info(id) bal = pinfo["video_balancer"] url = None #def_tp = "default" def_tp = "m3u8" if def_tp in bal: url = bal[def_tp] if url is None: url = bal.values()[0] params = self.curl_get_default_params() params['headers']['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ##print "BL URL", url answ = CUrl.download(url, 'compressed', **params) return def_tp, answ, url
def get_video(self, id, url): vkwargs = { 'headers' : self.default_headers, } ##del vkwargs['headers']['DNT'] vkwargs['headers']['Host'] = 'player.vimeo.com' vkwargs['headers']['Referer'] = self.url vdata_s = CUrl.download(url, 'compressed', **vkwargs) ## Here, by regexps we can find ids of video and audio tracks: #try: # v_ts = re.findall("""https:\/\/(.*?).vimeocdn.com\/(\d+)-(.*?)\/(\d+)\/video\/""", vdata_s)[0] #except Exception as err: # print "Can't find video idents!" # raise err ##a_ts = re.findall("""https:\/\/(.*?).vimeocdn.com\/(\d+)-(.*?)\/(\d+)\/audio\/""", vdata_s) try: v_data = re.findall("""function\(e,a\){var t={"cdn_url".*?request":(.*?),"player_url":""", vdata_s)[0] v_data = json.loads(v_data) except Exception as err: print "Can't find video config data!" raise err #print "VTS", v_ts #print #print "VDATA", json.dumps(v_data, indent=4) """ ## Download as segments: ## Check for separate audio & video: separate_av = False if "separate_av" in v_data["files"]["dash"] and v_data["files"]["dash"]["separate_av"]: separate_av = True ## Get video url: murl = None default_cdn = v_data["files"]["dash"]["default_cdn"] for cdn, d in v_data["files"]["dash"]["cdns"].items(): if "skyfire" in cdn: if separate_av: murl = re.sub(r"sep\/video\/\d+(.*?)$", "sep/", d["url"]) else: murl = re.sub(r"video\/\d+(.*?)$", "video/", d["url"]) break if murl is None: print "Wrong video config" return None url_a = None if separate_av: murl += "video/" url_a = murl + "audio/" ## Get high'st quality: stream_id = [0, None] for stream in v_data["files"]["dash"]["streams"]: try: qa = int(re.findall("^(\d+)p", stream["quality"])[0]) except: continue if qa > stream_id[0]: stream_id[0] = qa stream_id[1] = str(stream["id"]) logging.info("Selected quality: %s" % stream_id[0]) murl += stream_id[1] + '/chop/segment-%s.m4s' if separate_av: url_a += stream_id[1] + '/chop/segment-%s.m4s' print "DOWNLOAD %s: url:" % separate_av, murl """ ################################## ## Download as completly mp4 file: ## Get high'st quality: stream_id = [0, None] for stream in v_data["files"]["progressive"]: try: qa = int(re.findall("^(\d+)p", stream["quality"])[0]) except: continue if qa > stream_id[0]: stream_id[0] = qa stream_id[1] = stream["url"] print "Found quality: %sp" % stream_id[0] print "Download url: ", stream_id[1] ext_ = re.sub("\?(.*)$", "", stream_id[1]) ext_ = re.findall("\/.*?\.([^.]+)$", ext_)[0] name = re.findall("item/(.*?)$", self.url)[0] flname = "%s_%s.%s" % (name, id, ext_) print "Save as: %s" % flname vkwargs['output'] = flname del vkwargs['headers']['Host'] vkwargs['print_status'] = True CUrl.download(stream_id[1], **vkwargs)
def get_info(self, ident): url = "%s/api/video/%s/" % (self.host, ident) params = self.curl_get_default_params() params['headers']['Accept'] = "application/json, text/javascript, */*; q=0.01" answ = CUrl.download(url, 'compressed', **params) return json.loads(answ)
def start(self, ident): print "RUTUBE Mainpage..." info = self.main.get_info(ident) print "Title", info["title"] print "Description", info["description"] print print "Embed", info["embed_url"] print "TrackID", info["track_id"] #print "INFO:", json.dumps(info, indent=4) ## Get streams: def_tp, route, bl_url = self.main.get_route(info["track_id"]) flname = "%s.avi" % re.sub("""[\"\,\.\'\s\t\&\;\$\*]+""", "_", info["title"]) url = None ## XML data: if def_tp == "default": print "XML", route root = ET.fromstring(route) url = root.find('{http://ns.adobe.com/f4m/2.0}baseURL').text.strip() best_brate = 0 vfile = None for fl in root.findall('{http://ns.adobe.com/f4m/2.0}media'): br = int(fl.attrib['bitrate']) if br > best_brate: best_brate = br vfile = fl print "Found video format:", vfile.attrib['width'], "x", vfile.attrib['height'], url += vfile.attrib['href'] ## Text with urls: else: for l in route.split('\n'): if l.startswith('http'): url = l.strip() cmd_ = '''ffmpeg -i "%s" -y "%s"''' % (bl_url, flname) print "RUNING COMMAND: ", cmd_ print params = { 'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE, 'shell': True, } r = subprocess.Popen(cmd_, **params) while True: l = r.stderr.read(1) if len(l) == 0: break sys.stdout.write(l) print print "Download complete!" print "Saved as: %s" % flname r.stdout.read() return None print "URL: ", url ## GEtting video: ################# params = self.main.curl_get_default_params() ##params['headers']['Accept'] = "application/json, text/javascript, */*; q=0.01" ##params['headers']['Content-Type'] = "application/json" answ = CUrl.download(url, 'compressed', **params) print "VIDEO", answ