Ejemplo n.º 1
0
def getVideoByVid(vid):
    url = 'http://vv.video.qq.com/geturl?otype=xml&platform=1&vid=%s&format=2' % vid
    content=getHtml(url)
    videoUrl=None
    if content:
        videoUrl=r1(r'<url>(.*?)</url>',content)    
    return videoUrl
Ejemplo n.º 2
0
def getVideoByUrl(url):
#     tDir=r'e:\tmp'
#     fileName=r'v1.html'
#     filePath=os.path.join(tDir,fileName)  
      
    content=getHtml(url)
    
#     if content:    
#         fileKit.writeFileBinary(filePath, content)
#     content=fileKit.readFileBinary(filePath)
    videoUrl=None
    if content:
        videoUrl=r1(r'<param.*?videoUrl=(.*?)"',content)
    return videoUrl
Ejemplo n.º 3
0
def getVideoByUrl(url):
    if re.match(r'http://share.vrs.sohu.com', url):
        vid = r1('id=(\d+)', url)
    else:
        html = getHtml(url)#.decode('gbk')
        vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
    assert vid

    if re.match(r'http://tv.sohu.com/', url):
        info = json.loads(getHtml('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
        for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
            hqvid = info['data'][qtyp]
            if hqvid != 0 and hqvid != vid :
                info = json.loads(getHtml('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
                break
        host = info['allot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
        for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
        # assert data['clipsURL'][0].endswith('.mp4')

    else:
        info = json.loads(getHtml('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
        host = info['allot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
        for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host,vid,tvid,new,clipURL,ck))
    return urls
Ejemplo n.º 4
0
def getHtmlInfo():
    url = r"http://www.huxiu.com"
    wap_url = "http://m.huxiu.com"
    content = getHtml(url)
    # print content
    newsList = []
    if content:
        soup = BeautifulSoup(content, "html.parser", from_encoding="utf-8")
        itemList = soup.find_all("div", {"class": "mod-b mod-art "})
        itemList += soup.find_all("div", {"class": "mod-b mod-art mod-b-push"})
        for item in itemList:
            nInfo = {}
            head = item.find("", {"class": "mob-ctt"})
            if not head:
                continue
            title = head.find("h3")
            if not title:
                continue
            title = title.find("a")
            nInfo["url"] = url + title.get("href")
            nInfo["title"] = title.getText()
            nInfo["newsid"] = getMd5(nInfo["url"])
            nInfo["summary"] = item.find("div", {"class": "mob-sub"}).getText()
            nInfo["description"] = nInfo["summary"]
            nInfo["thumb"] = item.find("img", {"class": "lazy"}).get("data-original")
            nInfo["keywords"] = ""
            timeStr = head.find("span", {"class": "time"}).getText()
            timeSec = time.time()
            min_num = r1(u"(\d{1,2})分钟前", timeStr)
            if min_num:
                timeSec -= 60 * long(min_num)
            else:
                hour_num = r1(u"(\d{1,2})小时前", timeStr)
                if hour_num:
                    timeSec -= 3600 * long(hour_num)
                else:
                    day_num = r1(u"(\d{1,2})天前", timeStr)
                    timeSec = timeSec - long(day_num) * 24 * 3600 if day_num else timeSec
            nInfo["ctime"] = timeSec
            author_div = item.find("div", {"class": "mob-author"})
            nInfo["author"] = ""
            if author_div:
                author_span = author_div.find("span", {"class": "author-name "})
                nInfo["author"] = author_span.getText() if author_span else ""
            nInfo["source"] = ctable
            newsList.append(nInfo)
    return newsList
Ejemplo n.º 5
0
def getVideoByUrl(url):
    #     tDir=r'e:\tmp'
    #     fileName=r'v1.html'
    #     filePath=os.path.join(tDir,fileName)

    content = getHtml(url)

    #     if content:
    #         fileKit.writeFileBinary(filePath, content)
    #     content=fileKit.readFileBinary(filePath)

    videoUrl = None
    if content:
        # video
        videoUrl = r1(r"<video.*?src='(.*?)'", content)
        if not videoUrl:
            sourceWeb = r1(r'src="(.*?)" data-vid', content)
            dataVid = r1(r'data-vid="(.*?)"', content)
            if "ku6" in sourceWeb and dataVid:
                videoUrl = getKu6VideoByVid(dataVid)
    return videoUrl
Ejemplo n.º 6
0
def getVideoByUrl(url):
#     tDir=r'e:\tmp'
#     fileName=r'v1.html'
#     filePath=os.path.join(tDir,fileName)  
#     url is like:http://v.ifeng.com/news/world/201408/015041f2-2979-9982-9fb1-950a9390ac64.shtml  
#     vInfo_url_prefix=r'http://v.ifeng.com/video_info_new/4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml'  
    vInfo_url_prefix=r'http://v.ifeng.com/video_info_new/'#4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml' 
    d1=r1(r'.*/(.*?)\.',url)
#     print d1,d1[len(d1)-2],d1[len(d1)-2:len(d1)]
    vInfo_url=vInfo_url_prefix+d1[len(d1)-2]+r'/'+d1[len(d1)-2:len(d1)]+r'/'+d1+r'.xml'
#     print vInfo_url
    content=getHtml(vInfo_url)
    
#     if content:    
#         fileKit.writeFileBinary(filePath, content)
#     content=fileKit.readFileBinary(filePath)

    videoUrl=None
    if content:
        root = ET.fromstring(content)
        videoUrl=root[0].attrib.get('VideoPlayUrl')
    return videoUrl
Ejemplo n.º 7
0
def real_url(host,vid,tvid,new,clipURL,ck):
    url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random.random())
    return json.loads(getHtml(url))['url']