Beispiel #1
0
def getVideoByUrl(url):
#     url=r'http://tv.sohu.com/20140904/n404054190.shtml'
    html = getHtml(url)
    if not html:
        return
    vid = r1(r'share.vrs.sohu.com/(.*?)/',html)
    streamTypes = ["norVid", "highVid", "superVid", "oriVid"]
    streamType_url = "http://hot.vrs.sohu.com/vrs_flash.action?vid=" + vid
    content=getHtml(streamType_url)
    if not content:
        return
    jsonContent = json.loads(content)['data']
    videos=[]
    for streamType in streamTypes:
#         print streamType
        streamType_id = jsonContent[streamType]
        try:
            allot, prot, clipsURL, su = getPram(streamType_id)
            if(len(clipsURL)!=len(su)):
                continue
            else:
                video=[]
                for i in range(len(clipsURL)):
                    allot_url = "http://%s/?prot=%s&file=%s&new=%s"%(allot, prot, clipsURL[i], su[i])
                    prefix, key = getKey(allot_url)
                    realUrl = "%s%s?key=%s"%(prefix[0:-1], su[i], key)
#                     print realUrl
                    video.append(realUrl)
                if video:
                    videos.append(video)
        except:
            pass   
    if len(videos)>0:
        return videos[0]             
    return videos
Beispiel #2
0
def getVideoByUrl(url):
#     tDir=r'e:\tmp'
#     fileName=r'v1.html'
#     filePath=os.path.join(tDir,fileName)  

    content=getHtml(url)
    
#     if content:    
#         fileKit.writeFileBinary(filePath, content)
#     content=fileKit.readFileBinary(filePath)

    videoUrl=None
    if content:             
        # video, all type should be considered at the last
        videoUrl=r1(r'<source src="(.*?)"',content)
        if not videoUrl:
            # audio
            videoUrl=r1(r'audio src="(.*?)"',content)
            if not videoUrl:                    
                # all type
                videoUrl=getVideoInfoByUrl(url)
#         if videoUrl:
#             if not os.path.splitext(videoUrl)[1] in suffix:                              
#                 print videoUrl    
#                 videoUrl=None                              
    return videoUrl
Beispiel #3
0
def getVideoByVid(vid):
    url = 'http://vv.video.qq.com/geturl?otype=xml&platform=1&vid=%s&format=2' % vid
    content=getHtml(url)
    videoUrl=None
    if content:
        videoUrl=r1(r'<url>(.*?)</url>',content)    
    return videoUrl
Beispiel #4
0
def getPram(streamType_id):
    url = "http://hot.vrs.sohu.com/vrs_flash.action?vid=" + str(streamType_id)
    jsonContent = json.loads(getHtml(url))
    allot = jsonContent['allot']
    prot = jsonContent['prot']
    clipsURL = jsonContent['data']["clipsURL"]
    su = jsonContent['data']["su"]
    return allot, prot, clipsURL, su
Beispiel #5
0
	def download(self,url,filepath):
		#获取名称
		name = self.getName(url)
		html = common.getHtml(url)
		m3u8 = self.getM3u8(html)
		common.download(urllib.unquote(m3u8),filepath,name + '.m3u8')
		url = self.URL_PIRFIX + self.getSinavideoUrl(filepath+name+'.m3u8')
		common.download(url,filepath,name+'.mp4')
Beispiel #6
0
def getVideoByUrl(url):
#     http://dp.sina.cn/dpool/video/pad/play.php?url=http://video.sina.com.cn/p/news/c/v/2014-09-03/214064108827.html
    ipad_url=r'http://dp.sina.cn/dpool/video/pad/play.php?url='
#     r_url=ipad_url+urllib.quote(url)
    r_url=ipad_url+url
#     print r_url
    content=getHtml(r_url)
    if content:
        url=r1(r'<source.*?src="(.*?)"',content)
        return url
Beispiel #7
0
def getImage(imgurl,lock=fileLock):  
    content=getHtml(imgurl)   
    if not content:
        print 'getImage failure'
        return    
    imgname=md5(imgurl)+r'.jpg'
    saveasImage(imgname,content) 
    lock.acquire()
    savetoDownloaded(imgurl,imgname)
    lock.release()
Beispiel #8
0
def getVideoInfoByUrl(url):
    # url is like : http://domestic.kankanews.com/c/2014-08-04/0015274473.shtml
    # xml url is like : http://www.kankanews.com/vxml/2014-08-04/0015274473.xml
    part1=r1(r'(/\d{4}-\d{2}-\d{2}/\w*?)\.',url)
    xml_url=r'http://www.kankanews.com/vxml%s.xml'%part1
    content=getHtml(xml_url)
    videoUrl=None
    if content:
        root=ET.fromstring(content)
        resolution=root[0].text.replace('h264_1500k_mp4','h264_450k_mp4')   # just a patch
        videoUrl= root[1].text+resolution
    return videoUrl
Beispiel #9
0
def getVideoByUrl(url):
#     tDir=r'e:\tmp'
#     fileName=r'v1.html'
#     filePath=os.path.join(tDir,fileName)  

    content=getHtml(url)
    
#     if content:    
#         fileKit.writeFileBinary(filePath, content)
#     content=fileKit.readFileBinary(filePath)

    videoUrl=None
    if content:        
        # video
        videoUrl=r1(r"<video.*?src='(.*?)'",content)
        if not videoUrl:
            sourceWeb=r1(r'src="(.*?)" data-vid',content)
            dataVid=r1(r'data-vid="(.*?)"',content)
            if 'ku6' in sourceWeb and dataVid:
                videoUrl=getKu6VideoByVid(dataVid)         
    return videoUrl
Beispiel #10
0
def getVideoByUrl(url):
    #     tDir=r'e:\tmp'
    #     fileName=r'v1.html'
    #     filePath=os.path.join(tDir,fileName)
    #     url is like:http://v.ifeng.com/news/world/201408/015041f2-2979-9982-9fb1-950a9390ac64.shtml
    #     vInfo_url_prefix=r'http://v.ifeng.com/video_info_new/4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml'
    vInfo_url_prefix = r"http://v.ifeng.com/video_info_new/"  # 4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml'
    d1 = r1(r".*/(.*?)\.", url)
    #     print d1,d1[len(d1)-2],d1[len(d1)-2:len(d1)]
    vInfo_url = vInfo_url_prefix + d1[len(d1) - 2] + r"/" + d1[len(d1) - 2 : len(d1)] + r"/" + d1 + r".xml"
    #     print vInfo_url
    content = getHtml(vInfo_url)

    #     if content:
    #         fileKit.writeFileBinary(filePath, content)
    #     content=fileKit.readFileBinary(filePath)

    videoUrl = None
    if content:
        root = ET.fromstring(content)
        videoUrl = root[0].attrib.get("VideoPlayUrl")
    return videoUrl
Beispiel #11
0
 def download(self, url, filepath):
     jsonUrl = self.URL_PRIFIX + self.getJsonUrl(url)
     html = common.getHtml(jsonUrl)
     voiceTitle, voiceUrl = self.getVoiceUrl(html)
     common.download(voiceUrl, filepath, voiceTitle + '.m4a')
Beispiel #12
0
def getKey(allot_url):
    content = getHtml(allot_url)
    listTmp = content.split('|')
    prefix = listTmp[0]
    key = listTmp[3]
    return prefix, key