def prepare_data(posfilpath, negfilepath, size, word2index): posfiles = common.get_files(posfilpath) negfiles = common.get_files(negfilepath) sz = min(2*len(posfiles), 2*len(negfiles), size) x_train = list() y_train = np.zeros((size, 1)) for i in range(int(size/2)): posline = common.get_content(posfilpath + posfiles[i]) k = 2*i if (posline != ""): sent_pos = posline.lower().split(" ") sent_ls = list() for word in sent_pos: if word in word2index: sent_ls.append(word2index[word]) y_train[k][0] = 1 x_train.append(list(set(sent_ls))) negline = common.get_content(negfilepath + negfiles[i]) if (negline != ""): sent_neg = negline.lower().split(" ") sent_ls = list() for word in sent_neg: if word in word2index: sent_ls.append(word2index[word]) x_train.append(list(set(sent_ls))) return (x_train, y_train, sz)
def prepare_data(posfilpath, negfilepath, size): posfiles = common.get_files(posfilpath) negfiles = common.get_files(negfilepath) sz = min(2 * len(posfiles), 2 * len(negfiles), size) x_train = list() x_concat = list() for i in range(int(size / 2)): posline = common.get_content(posfilpath + posfiles[i]) if (posline != ""): sent_pos = posline.lower().split(" ") sent_ls = list() for word in sent_pos: if word in word2index: word_i = word2index[word] sent_ls.append(word_i) x_concat.append(word_i) x_train.append(sent_ls) negline = common.get_content(negfilepath + negfiles[i]) if (negline != ""): sent_neg = negline.lower().split(" ") sent_ls = list() for word in sent_neg: if word in word2index: word_i = word2index[word] sent_ls.append(word_i) x_concat.append(word_i) x_train.append(sent_ls) return (x_train, x_concat, sz)
def get_funshion_vid(rurl): if re.match(r'http://www.fun.tv/vplay/.*m-(\d+)', rurl): vid = r1(r'http://www.fun.tv/vplay/.*m-(\d+)', rurl) else: html = get_content(url) vid = r1(r'\"mediaid\":(\d+)', html) return vid
def get_pps_vid(html): if re.match(r'http://v.pps.tv/play_(.*).html',html): vid = r1(r'http://v.pps.tv/play_(.*).html',html) else: con = get_content(html) vid = r1(r'url_key: "(.*)",',con) return vid
def get_urls_by_vid(vid): urls = [] tn = get_timestamp() key = get_key(tn) url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format( vid, key) info = get_content(url) playurl = json.loads(info)['playurl'] domain = playurl['domain'][0] dispatch = playurl['dispatch'] for k in dispatch.keys(): template = {} url = dispatch[k][0] rate = get_rateid(k) template['rate'] = rate[1] url = domain + url + '&retry=1&tag=flash&sign=webdisk_19722818&termid=1&pay=0&ostype=windows&hwtype=un' url = url.replace('platid=1', 'platid=14') url = url.replace('splatid=101', 'splatid=1401') print rate[1] print url template['furls'] = [url] urls.append(template) if 'tss=ios' in url: ano = url.replace('tss=ios', 'tss=no') else: ano = url.replace('tss=no', 'tss=ios') print ano urls.append({'rate': rate[1], 'furls': [ano]}) return urls
def get_m1905_vid(html): if re.match(r"http://www.1905.com/vod/play/(.*).shtml.*", html): vid = r1(r"http://www.1905.com/vod/play/(.*).shtml.*", html) else: con = get_content(html) vid = r1(r'vid : "(.*)",', con) return vid
def collect(self): folder_queue = [self.parser.source] home_path_len = len(folder_queue[0]) while (len(folder_queue) > 0): current_folder = folder_queue[0] folder_queue = folder_queue[1:] if len(current_folder[home_path_len:]) == 0: print("[+] Scan /") else: print("[+] Scan {}".format(current_folder[home_path_len:])) files, folders = get_content(current_folder) # skip folder named '.folder' # generate full path for folder in folders: if folder[0] != '.': full_path = current_folder + '/' + folder folder_queue.append(full_path) # work with files for f in files: try: fp = current_folder + '/' + f # full path to file tag = TinyTag.get(fp) except LookupError: continue except: print("Cannot get tag from file --> Skip\n\t{}".format(fp)) continue self.sort(fp, tag)
def get_letv_vid(url): if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url): vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') else: html = get_content(url) vid = match1(html, r'vid="(\d+)"') return vid
def get_urls_by_vid(vid): urls = [] tn = get_timestamp() key = get_key(tn) url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key) info = get_content(url) playurl = json.loads(info)['playurl'] domain = playurl['domain'][0] dispatch = playurl['dispatch'] for k in dispatch.keys(): template = {} url = dispatch[k][0] rate = get_rateid(k) template['rate'] = rate[1] url = domain + url + '&retry=1&tag=flash&sign=webdisk_19722818&termid=1&pay=0&ostype=windows&hwtype=un' url = url.replace('platid=1', 'platid=14') url = url.replace('splatid=101','splatid=1401') print rate[1] print url template['furls'] = [url] urls.append(template) if 'tss=ios' in url: ano = url.replace('tss=ios','tss=no') else: ano = url.replace('tss=no','tss=ios') print ano urls.append({'rate':rate[1],'furls':[ano]}) return urls
def get_qq_vid(url): if re.match(r'http://v.qq.com/([^\?]+)\?vid', url): vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url) else: html = get_content(url) vid = r1(r'vid:"(.*)"', html) return vid
def youku_ups_TV(self): # + vid + ccode + client_ip + utid + client_ts + ckey + password url = 'https://ups.cp31.ott.cibntv.net/ups/get.json?vid={}&ccode={}'.format( self.vid, self.ccode) url += '&client_ip=192.168.1.5' self.utid = self.fetch_cna() url += '&utid=' + self.utid url += '&client_ts=' + str(int(time.time())) #self.ckey = 'fdffd' self.ckey = '7B19C0AB12633B22E7FE81271162026020570708D6CC189E4924503C49D243A0DE6CD84A766832C2C99898FC5ED31F3709BB3CDD82C96492E721BDD381735026' url += '&ckey=' + urllib.parse.quote(self.ckey) # 编码操作 if self.password_protected: url += '&password='******'User-Agent'] = self.ua self.UpsUrl = url api_meta = json.loads(get_content(url, headers=headers)) self.api_data = api_meta['data'] data_error = self.api_data.get('error') if data_error: self.api_error_code = data_error.get('code') self.api_error_msg = data_error.get('note') if 'videos' in self.api_data: if 'list' in self.api_data['videos']: self.video_list = self.api_data['videos']['list'] if 'next' in self.api_data['videos']: self.video_next = self.api_data['videos']['next']
def get_pps_vid(html): if re.match(r'http://v.pps.tv/play_(.*).html', html): vid = r1(r'http://v.pps.tv/play_(.*).html', html) else: con = get_content(html) vid = r1(r'url_key: "(.*)",', con) return vid
def get_m1905_vid(html): if re.match(r'http://www.1905.com/vod/play/(.*).shtml.*',html): vid = r1(r'http://www.1905.com/vod/play/(.*).shtml.*',html) else: con = get_content(html) vid = r1(r'vid : "(.*)",',con) return vid
def youku_ups(self): # + vid + ccode + client_ip + utid + client_ts + ckey + password url = 'https://ups.youku.com/ups/get.json?vid={}&ccode={}'.format( self.vid, self.ccode) url += '&client_ip=192.168.1.2' self.utid = self.fetch_cna() # self.utid = 'W59PmgAAACkDANk5JyfUl791' url += '&utid=' + self.utid #url += '&utid=' + self.getUtid().decode('utf-8') url += '&client_ts=' + str(int(time.time())) self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND' url += '&ckey=' + urllib.parse.quote(self.ckey) #编码操作 if self.password_protected: url += '&password='******'User-Agent'] = self.ua self.UpsUrl = url api_meta = json.loads(get_content(url, headers=headers)) self.api_data = api_meta['data'] data_error = self.api_data.get('error') if data_error: self.api_error_code = data_error.get('code') self.api_error_msg = data_error.get('note') if 'videos' in self.api_data: if 'list' in self.api_data['videos']: self.video_list = self.api_data['videos']['list'] if 'next' in self.api_data['videos']: self.video_next = self.api_data['videos']['next']
def get_funshion_vid(rurl): if re.match(r'http://www.fun.tv/vplay/.*m-(\d+)',rurl): vid = r1(r'http://www.fun.tv/vplay/.*m-(\d+)',rurl) else: html = get_content(url) vid = r1(r'\"mediaid\":(\d+)',html) return vid
def get_vkey_by_id(vid,idx,fmt): xml = get_content('http://vv.video.qq.com/getclip?vid={}&idx={}&fmt={}'.format(vid,idx,fmt)) root = ET.fromstring(xml) fn = root.find('vi/fn').text vkey = root.find('vi/key').text suffix = fn+'?vkey='+vkey return suffix
def get_fun_allurls(vid,playnum): urls = [] pos = 0 info = get_content('http://jsonfe.funshion.com/media/?cli=ipad&ver=2.0.0.1&ta=0&mid={}'.format(vid)) number = r2(r'"number":"(\d*)",',info) print len(number),number # mpurls = r2(r'"mpurls":(\{.*?\{.*?\}.*?\{.*?\}.*?\{.*?\}\})',info) tvurl = r2('\"tv\":{\"url\":\"(.*?)\"',info) dvdurl = r2('\"dvd\":{\"url\":\"(.*?)\"',info) highdvd = r2('\"highdvd\":{\"url\":\"(.*?)\"',info) print len(tvurl),tvurl print len(dvdurl),dvdurl print len(highdvd),highdvd # print len(number),number,len(mpurls),mpurls if len(number) == 0: urls = [] elif len(number) == len(tvurl): for i in range(0,len(number)): if number[i] == playnum: pos = i break if pos < len(tvurl): template = {} template['rate'] = get_clarity('tv') furls = [tvurl[pos].replace('\\','')] template['furls'] = furls urls.append(template) if pos < len(dvdurl): template = {} template['rate'] = get_clarity('dvd') furls = [dvdurl[pos].replace('\\','')] template['furls'] = furls urls.append(template) if pos < len(highdvd): template = {} template['rate'] = get_clarity('highdvd') furls = [highdvd[pos].replace('\\','')] template['furls'] = furls urls.append(template) else: if 0 < len(tvurl): template = {} template['rate'] = get_clarity('tv') furls = [tvurl[0].replace('\\','')] template['furls'] = furls urls.append(template) if 0 < len(dvdurl): template = {} template['rate'] = get_clarity('dvd') furls = [dvdurl[0].replace('\\','')] template['furls'] = furls urls.append(template) if 0 < len(highdvd): template = {} template['rate'] = get_clarity('highdvd') furls = [highdvd[0].replace('\\','')] template['furls'] = furls urls.append(template) return urls
def get_letv_vid(url): if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url): vid = match1(url,r'http://www.letv.com/ptv/vplay/(\d+).html') else: html = get_content(url) vid = match1(html, r'vid="(\d+)"') return vid
def getVMS(tvid, vid, uid): tm = randint(1000, 2000) vmsreq = 'http://cache.video.qiyi.com/vms?key=fvip&src=p' + "&tvId=" + tvid + "&vid=" + vid + "&vinfo=1&tm=" + str( tm) + "&enc=" + hashlib.new( 'md5', bytes('ts56gh' + str(tm) + tvid)).hexdigest() + "&qyid=" + uid + "&tn=" + str( random()) return json.loads(get_content(vmsreq))
def get_m1905_m3u8(vid): try: url = "http://www.1905.com/api/video/getmediainfo.php?id={}&type=0&source_key=m3u8ipad".format(vid) con = get_content(url) m3url = r1(r'"iosurl":"(.*?)",', con) m3u8 = base64.decodestring(m3url) except Exception, e: print e m3u8 = None
def get_content(self): infos = [ ("Version", "%s -> %s" % (self.request_version, self.server_version)), ("Method", self.command), ("Path", self.path), ] return common.get_content(proto[0], infos, self.connection.getpeername(), self.connection.getsockname(), self.headers)
def get_vid(url): html = get_content(url) try: pattern = re.compile("share.vrs.sohu.com/(.*?)/") match = pattern.search(html) vid = match.group(1) except : vid = r1(r'vid="(.*)";',html) return vid
def get_vkey_by_id(vid, idx, fmt): xml = get_content( 'http://vv.video.qq.com/getclip?vid={}&idx={}&fmt={}'.format( vid, idx, fmt)) root = ET.fromstring(xml) fn = root.find('vi/fn').text vkey = root.find('vi/key').text suffix = fn + '?vkey=' + vkey return suffix
def get_vid_from_page(self): if not self.url: raise Exception('No url') self.page = get_content(self.url) b64p = r'([a-zA-Z0-9=]+)' str = 'videoId2: \'(.+)\'' hit = re.search(str, self.page) if hit is not None: self.vid = hit.group(1)
def get_vid(url): html = get_content(url) try: pattern = re.compile("share.vrs.sohu.com/(.*?)/") match = pattern.search(html) vid = match.group(1) except: vid = r1(r'vid="(.*)";', html) return vid
def get_news_url_by_vid(vid): try: url = "http://video.sina.com.cn/interface/video_ids/video_ids.php?v={}".format(vid) html = json.loads(get_content(url)) newsvid = html["ipad_vid"] url = "http://v.iask.com/v_play_ipad.php?vid={}&tags=newsList_web".format(newsvid) except Exception, e: print e url = None
def get_m1905_m3u8(vid): try: url = 'http://www.1905.com/api/video/getmediainfo.php?id={}&type=0&source_key=m3u8ipad'.format(vid) con = get_content(url) m3url = r1(r'"iosurl":"(.*?)",',con) m3u8 = base64.decodestring(m3url) except Exception,e: print e m3u8 = None
def get_funshion_playnum(rurl): playNum = r1('http://www.fun.tv/vplay/.*m-\d+.e-(\d+)', rurl) print 'playNum', playNum if playNum == None: html = get_content(rurl) playNum = r1("minfo.playNumber = \'(\d+)\';", html) if playNum == None: playNum = 1 print 'playNum2', playNum return playNum
def get_funshion_playnum(rurl): playNum = r1('http://www.fun.tv/vplay/.*m-\d+.e-(\d+)',rurl) print 'playNum',playNum if playNum == None: html = get_content(rurl) playNum = r1("minfo.playNumber = \'(\d+)\';",html) if playNum == None: playNum = 1 print 'playNum2',playNum return playNum
def get_video(vid, stream_type=None): url = "http://v.youku.com/player/getPlayList/VideoIDS/{}/Pf/4/ctype/12/ev/1".format( vid) vvid = vid info = json.loads(get_content(url)) #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5) data = info['data'][0] segs = data['segs'] types = segs.keys() if not stream_type: for x in ['hd3', 'hd2', 'mp4', 'flv']: if x in types: stream_type = x break else: raise NotImplementedError() assert stream_type in ('hd3', 'hd2', 'mp4', 'flv') print 'stream_type', stream_type file_type = { 'hd3': 'flv', 'hd2': 'flv', 'mp4': 'mp4', 'flv': 'flv' }[stream_type] seed = info['data'][0]['seed'] source = list( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890" ) mixed = '' while source: seed = (seed * 211 + 30031) & 0xFFFF index = seed * len(source) >> 16 c = source.pop(index) mixed += c ids = info['data'][0]['streamfileids'][stream_type].split('*')[:-1] vid = ''.join(mixed[int(i)] for i in ids) sid = '%s%s%s' % (int(time.time() * 1000), randint( 1000, 1999), randint(1000, 9999)) ep = data['ep'] ip = data['ip'] query = get_segurls_param(ep, ip) print query urls = [] for s in segs[stream_type]: no = '%02x' % int(s['no']) url = 'http://k.youku.com/player/getFlvPath/sid/%s_%s/st/%s/fileid/%s%s%s?K=%s&ts=%s' % ( sid, no, file_type, vid[:8], no.upper(), vid[10:], s['k'], s['seconds']) print url + '&' + query urls.append((url, int(s['size']))) return urls
def get_video_flv_complete_by_id(vid): try: info = get_content('http://vv.video.qq.com/getinfo?vids={}&otype=xml&defaultfmt=flv'.format(vid)) root = ET.fromstring(info) fn = root.find('vl/vi/fn').text fvkey = root.find('vl/vi/fvkey').text ui = root.find('vl/vi/ul/ui/url').text url = ui+fn+'?vkey='+fvkey except Exception,e: print e url = None
def get_pps_urls_by_id(vid): urls = [] for i in range(0,2): con = get_content('http://dp.ugc.pps.tv/get_play_url_cdn.php?sid={}&flash_type=1&type={}'.format(vid, i)) if 'pfv' in con: template = {} con = r1(r'(.*)&all.*',con) template['rate'] = get_pps_rate(str(i)) template['furls'] = [con] urls.append(template) return urls
def get_news_url_by_vid(vid): try: url = 'http://video.sina.com.cn/interface/video_ids/video_ids.php?v={}'.format( vid) html = json.loads(get_content(url)) newsvid = html['ipad_vid'] url = 'http://v.iask.com/v_play_ipad.php?vid={}&tags=newsList_web'.format( newsvid) except Exception, e: print e url = None
def get_urls(html): try: vid = get_wasu_id(html) print vid suffix = get_suffix_by_html(html) url = 'http://www.wasu.cn/Api/getVideoUrl/id/' + vid + suffix info = get_content(url) root = ET.fromstring(info) url = root.find('video').text print url except: url = 'Error' return parser2dic([{'rate': '标清', 'furls': [url]}])
def get_video_mp4_complete_by_id(vid): try: info = get_content( 'http://vv.video.qq.com/getinfo?vids={}&otype=xml&defaultfmt=mp4'. format(vid)) root = ET.fromstring(info) fn = root.find('vl/vi/fn').text fvkey = root.find('vl/vi/fvkey').text ui = root.find('vl/vi/ul/ui/url').text url = ui + fn + '?vkey=' + fvkey except Exception, e: print e url = None
def get_pps_urls_by_id(vid): urls = [] for i in range(0, 2): con = get_content( 'http://dp.ugc.pps.tv/get_play_url_cdn.php?sid={}&flash_type=1&type={}' .format(vid, i)) if 'pfv' in con: template = {} con = r1(r'(.*)&all.*', con) template['rate'] = get_pps_rate(str(i)) template['furls'] = [con] urls.append(template) return urls
def get_urls(html): try: vid = get_wasu_id(html) print vid suffix = get_suffix_by_html(html) url = "http://www.wasu.cn/Api/getVideoUrl/id/" + vid + suffix info = get_content(url) root = ET.fromstring(info) url = root.find("video").text print url except: url = "Error" return parser2dic([{"rate": "标清", "furls": [url]}])
def get_content(s, request, context): method = sys._getframe(1).f_code.co_name infos = [ ("Service", "-helloworld.Greeter"), ("Method", method), ] headers = {} for c in context.invocation_metadata(): headers[c.key] = c.value return common.get_content("grpc", infos, context.peer(), get_host_ip(), headers)
def get_video_sections_by_id(vid, fmt): try: xml = get_content('http://vv.video.qq.com/getinfo?vids=%s' % vid +'&defaultfmt=%s' % fmt) root = ET.fromstring(xml) num = root.find('vl/vi/cl/fc').text ui = root.find('vl/vi/ul/ui/url').text urls = [] for i in range(1,int(num)+1): suffix = get_vkey_by_id(vid, i, fmt) url = ui+suffix urls.append(url) except Exception,e: print e urls = None
def prepare_data(posfilpath, negfilepath, size): posfiles = common.get_files(posfilpath) negfiles = common.get_files(negfilepath) reviews = list() raw_sent = list() for i in range(int(size / 2)): posline = common.get_content(posfilpath + posfiles[i]) if (posline != ""): posline = posline.lower() sent = posline.split(" ") reviews.append(makesentvec(sent)) raw_sent.append(posline) negline = common.get_content(negfilepath + negfiles[i]) if (negline != ""): negline = negline.lower() sent = negline.split(" ") reviews.append(makesentvec(sent)) raw_sent.append(negline) return (reviews, raw_sent)
def get_real_urls(video_links,gen_uid,info): urls = [] for i in video_links: vlink=i["l"] # print(vlink) if not vlink.startswith("/"): #vlink is encode vlink=getVrsEncodeCode(vlink) assert vlink.endswith(".f4v") key=getDispathKey(vlink.split("/")[-1].split(".")[0]) baseurl=info["data"]["vp"]["du"].split("/") baseurl.insert(-1,key) url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) urls.append(json.loads(get_content(url))["l"]) return urls
def get_kankan_mparam(gcid,param): info = get_content('http://mp4.cl.kankan.com/getCdnresource_flv?gcid={}'.format(gcid)) ip = r1(r'ip:"(.*?)"',info) path = r1(r'path:"(.*?)"',info) url = 'http://' + ip +'/'+ path param1 = r1(r'param1:(.*),',info) param2 = r1(r'param2:(.*)}',info) if param == 'url': return url elif param == 'param1': return param1 elif param == 'param2': return param2 else: return url
def get_video_sections_by_id(vid, fmt): try: xml = get_content('http://vv.video.qq.com/getinfo?vids=%s' % vid + '&defaultfmt=%s' % fmt) root = ET.fromstring(xml) num = root.find('vl/vi/cl/fc').text ui = root.find('vl/vi/ul/ui/url').text urls = [] for i in range(1, int(num) + 1): suffix = get_vkey_by_id(vid, i, fmt) url = ui + suffix urls.append(url) except Exception, e: print e urls = None
def get_kankan_mparam(gcid, param): info = get_content( 'http://mp4.cl.kankan.com/getCdnresource_flv?gcid={}'.format(gcid)) ip = r1(r'ip:"(.*?)"', info) path = r1(r'path:"(.*?)"', info) url = 'http://' + ip + '/' + path param1 = r1(r'param1:(.*),', info) param2 = r1(r'param2:(.*)}', info) if param == 'url': return url elif param == 'param1': return param1 elif param == 'param2': return param2 else: return url
def get_m1905_urls(vid): urls = [] m3u8url = get_m1905_m3u8(vid) if m3u8url != None: urls.append({"rate": "标清", "furls": [m3u8url]}) fir = r1(r"(\d).*", vid) sec = r1(r"\d(\d).*", vid) info = get_content("http://static.m1905.cn/profile/vod/{}/{}/{}_1.xml".format(fir, sec, vid)) root = ET.fromstring(info) links = root.find("playlist/item").attrib for i in links: if i in ["url", "sdurl", "bkurl", "hdurl"]: template = {} template["rate"] = get_clarity(i) template["furls"] = [links[i]] urls.append(template) return urls
def get_m1905_urls(vid): urls = [] m3u8url = get_m1905_m3u8(vid) if m3u8url != None: urls.append({'rate':'标清','furls':[m3u8url]}) fir = r1(r'(\d).*',vid) sec = r1(r'\d(\d).*',vid) info = get_content('http://static.m1905.cn/profile/vod/{}/{}/{}_1.xml'.format(fir,sec,vid)) root = ET.fromstring(info) links = root.find('playlist/item').attrib for i in links: if i in ['url','sdurl','bkurl','hdurl']: template = {} template['rate'] = get_clarity(i) template['furls'] = [links[i]] urls.append(template) return urls
def get_real_urls(video_links, gen_uid, info): urls = [] for i in video_links: vlink = i["l"] # print(vlink) if not vlink.startswith("/"): #vlink is encode vlink = getVrsEncodeCode(vlink) assert vlink.endswith(".f4v") key = getDispathKey(vlink.split("/")[-1].split(".")[0]) baseurl = info["data"]["vp"]["du"].split("/") baseurl.insert(-1, key) url = "/".join( baseurl ) + vlink + '?su=' + gen_uid + '&client=&z=&bt=&ct=&tn=' + str( randint(10000, 20000)) urls.append(json.loads(get_content(url))["l"]) return urls
def get_cntv_urls_by_id(pid): urls = [] info = json.loads(get_content('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + pid)) hls_url = info['hls_url'] if hls_url != '': template = {'rate':'标清','furls':[hls_url]} urls.append(template) video = info['video'] for x in video.keys(): if x in ["chapters2","lowChapters","chapters"]: templates = {} segs = video[x] templates['rate'] = get_clarity(x) furls = [] for y in range(0,len(segs)): furls.append(segs[y]['url']) templates['furls'] = furls urls.append(templates) return urls
def get_urls(url): urls = [] newsvid = get_newsvid(url) if newsvid != None: newsurl = get_news_url_by_vid(newsvid) if newsurl != None: urls.append({"rate": "标清", "furls": [newsurl]}) html = get_content(url) ipadvid = get_ipadvid(html) if ipadvid != None: ipadurl = get_urls_by_vid(ipadvid) if ipadurl != None: urls.append({"rate": "标清", "furls": ipadurl}) segvids = get_segvids(html) if segvids != None: for i in range(0, len(segvids)): url = get_urls_by_vid(segvids[i]) if url != None: urls.append({"rate": rate[i], "furls": url}) return parser2dic(urls)
def get_cntv_urls_by_id(pid): urls = [] info = json.loads( get_content('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + pid)) hls_url = info['hls_url'] if hls_url != '': template = {'rate': '标清', 'furls': [hls_url]} urls.append(template) video = info['video'] for x in video.keys(): if x in ["chapters2", "lowChapters", "chapters"]: templates = {} segs = video[x] templates['rate'] = get_clarity(x) furls = [] for y in range(0, len(segs)): furls.append(segs[y]['url']) templates['furls'] = furls urls.append(templates) return urls
def get_urls(url): urls = [] for i in range(0,3): template = {} flvcdurl = "http://www.flvcd.com/parse.php?format={}&kw={}".format(form[i], quote(url)) content = get_content(flvcdurl) furls = r2('<BR><a href=\"(.*?)\" target=',content) if furls!= []: print 'flvcd multi' template['rate'] = rate[i] template['furls'] = furls urls.append(template) else: print 'flvcd single' sfurls = r1('<br>.*?<a href=\"(.*?)\" target',content) print 'sfurls',sfurls if(sfurls!=None): template['rate'] = rate[i] template['furls'] = [sfurls] urls.append(template) return parser2dic(urls)
def get_iqiyi_urls(url): allurls = [] threads = [] gen_uid = uuid4().hex html = get_content(url) tvid = r1(r'data-player-tvid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html) assert tvid assert videoid info = getVMS(tvid,videoid,gen_uid) bids = [] videos = [] try: for i in info["data"]["vp"]["tkl"][0]["vs"]: bid=int(i["bid"]) bids.append(bid) video_links=i["fs"] videos.append(video_links) for i in range(0,len(videos)): thread1 = getUrls(i,[videos[i], gen_uid, info,str(bids[i])]) threads.append(thread1) thread1.start() except Exception,e: print e
def get_timestamp(): tn = random.random() url = 'http://api.letv.com/time?tn={}'.format(tn) result = get_content(url) return json.loads(result)['stime']
def getVMS(tvid,vid,uid): tm=randint(1000,2000) vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=p'+"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+"&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid)).hexdigest()+"&qyid="+uid+"&tn="+str(random()) return json.loads(get_content(vmsreq))
def getDispathKey(rid): tp=")(*&^flash@#$%a" #magic from swf time=json.loads(get_content("http://data.video.qiyi.com/t?tn="+str(random())))["t"] t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid)).hexdigest()
def get_suffix_by_html(html): con = get_content(html) playUrl = r1(r"_playUrl = \'(.*?)\',", con) playKey = r1(r"_playKey = \'(.*?)\',", con) suffix = "/url/" + playUrl + "/key/" + playKey return suffix
def get_texts(self): with open(self.input) as f: for sentence in f: if not sentence.strip(): continue yield common.get_content(sentence)
def get_gcid(html): con = get_content(html) gcid = r1(r'http://pubnet.sandai.net:8080/\d+/(.*?)/.*?.mp4',con) return gcid