def getVideoByUrl(url): # url=r'http://tv.sohu.com/20140904/n404054190.shtml' html = getHtml(url) if not html: return vid = r1(r'share.vrs.sohu.com/(.*?)/',html) streamTypes = ["norVid", "highVid", "superVid", "oriVid"] streamType_url = "http://hot.vrs.sohu.com/vrs_flash.action?vid=" + vid content=getHtml(streamType_url) if not content: return jsonContent = json.loads(content)['data'] videos=[] for streamType in streamTypes: # print streamType streamType_id = jsonContent[streamType] try: allot, prot, clipsURL, su = getPram(streamType_id) if(len(clipsURL)!=len(su)): continue else: video=[] for i in range(len(clipsURL)): allot_url = "http://%s/?prot=%s&file=%s&new=%s"%(allot, prot, clipsURL[i], su[i]) prefix, key = getKey(allot_url) realUrl = "%s%s?key=%s"%(prefix[0:-1], su[i], key) # print realUrl video.append(realUrl) if video: videos.append(video) except: pass if len(videos)>0: return videos[0] return videos
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) content=getHtml(url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl=None if content: # video, all type should be considered at the last videoUrl=r1(r'<source src="(.*?)"',content) if not videoUrl: # audio videoUrl=r1(r'audio src="(.*?)"',content) if not videoUrl: # all type videoUrl=getVideoInfoByUrl(url) # if videoUrl: # if not os.path.splitext(videoUrl)[1] in suffix: # print videoUrl # videoUrl=None return videoUrl
def getVideoByVid(vid): url = 'http://vv.video.qq.com/geturl?otype=xml&platform=1&vid=%s&format=2' % vid content=getHtml(url) videoUrl=None if content: videoUrl=r1(r'<url>(.*?)</url>',content) return videoUrl
def getPram(streamType_id): url = "http://hot.vrs.sohu.com/vrs_flash.action?vid=" + str(streamType_id) jsonContent = json.loads(getHtml(url)) allot = jsonContent['allot'] prot = jsonContent['prot'] clipsURL = jsonContent['data']["clipsURL"] su = jsonContent['data']["su"] return allot, prot, clipsURL, su
def download(self,url,filepath): #获取名称 name = self.getName(url) html = common.getHtml(url) m3u8 = self.getM3u8(html) common.download(urllib.unquote(m3u8),filepath,name + '.m3u8') url = self.URL_PIRFIX + self.getSinavideoUrl(filepath+name+'.m3u8') common.download(url,filepath,name+'.mp4')
def getVideoByUrl(url): # http://dp.sina.cn/dpool/video/pad/play.php?url=http://video.sina.com.cn/p/news/c/v/2014-09-03/214064108827.html ipad_url=r'http://dp.sina.cn/dpool/video/pad/play.php?url=' # r_url=ipad_url+urllib.quote(url) r_url=ipad_url+url # print r_url content=getHtml(r_url) if content: url=r1(r'<source.*?src="(.*?)"',content) return url
def getImage(imgurl,lock=fileLock): content=getHtml(imgurl) if not content: print 'getImage failure' return imgname=md5(imgurl)+r'.jpg' saveasImage(imgname,content) lock.acquire() savetoDownloaded(imgurl,imgname) lock.release()
def getVideoInfoByUrl(url): # url is like : http://domestic.kankanews.com/c/2014-08-04/0015274473.shtml # xml url is like : http://www.kankanews.com/vxml/2014-08-04/0015274473.xml part1=r1(r'(/\d{4}-\d{2}-\d{2}/\w*?)\.',url) xml_url=r'http://www.kankanews.com/vxml%s.xml'%part1 content=getHtml(xml_url) videoUrl=None if content: root=ET.fromstring(content) resolution=root[0].text.replace('h264_1500k_mp4','h264_450k_mp4') # just a patch videoUrl= root[1].text+resolution return videoUrl
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) content=getHtml(url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl=None if content: # video videoUrl=r1(r"<video.*?src='(.*?)'",content) if not videoUrl: sourceWeb=r1(r'src="(.*?)" data-vid',content) dataVid=r1(r'data-vid="(.*?)"',content) if 'ku6' in sourceWeb and dataVid: videoUrl=getKu6VideoByVid(dataVid) return videoUrl
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) # url is like:http://v.ifeng.com/news/world/201408/015041f2-2979-9982-9fb1-950a9390ac64.shtml # vInfo_url_prefix=r'http://v.ifeng.com/video_info_new/4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml' vInfo_url_prefix = r"http://v.ifeng.com/video_info_new/" # 4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml' d1 = r1(r".*/(.*?)\.", url) # print d1,d1[len(d1)-2],d1[len(d1)-2:len(d1)] vInfo_url = vInfo_url_prefix + d1[len(d1) - 2] + r"/" + d1[len(d1) - 2 : len(d1)] + r"/" + d1 + r".xml" # print vInfo_url content = getHtml(vInfo_url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl = None if content: root = ET.fromstring(content) videoUrl = root[0].attrib.get("VideoPlayUrl") return videoUrl
def download(self, url, filepath): jsonUrl = self.URL_PRIFIX + self.getJsonUrl(url) html = common.getHtml(jsonUrl) voiceTitle, voiceUrl = self.getVoiceUrl(html) common.download(voiceUrl, filepath, voiceTitle + '.m4a')
def getKey(allot_url): content = getHtml(allot_url) listTmp = content.split('|') prefix = listTmp[0] key = listTmp[3] return prefix, key