def video_from_url(self, url, **kwargs): # Embedded player id = r1(r'.tudou.com/v/([^/]+)/', url) if id: return self.tudou_download_by_id(id, title='') html = get_html(url) try: title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") assert title title = unescape_html(title) except AttributeError: title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"') if title is None: title = '' vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html) if vcode is None: vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"') if vcode: print "vcode", vcode from youku import Youku return Youku().video_from_vid(vcode, **kwargs) iid = r1(r'iid\s*[:=]\s*(\d+)', html) if not iid: return self.tudou_download_playlist(url, **kwargs) else: return self.tudou_download_by_iid(iid, title, **kwargs)
def get_m1905_vid(html): if re.match(r'http://www.1905.com/vod/play/(.*).shtml.*',html): vid = r1(r'http://www.1905.com/vod/play/(.*).shtml.*',html) else: con = get_content(html) vid = r1(r'vid : "(.*)",',con) return vid
def get_qq_vid(url): if re.match(r'http://v.qq.com/([^\?]+)\?vid', url): vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url) else: html = get_content(url) vid = r1(r'vid:"(.*)"', html) return vid
def get_funshion_vid(rurl): if re.match(r'http://www.fun.tv/vplay/.*m-(\d+)', rurl): vid = r1(r'http://www.fun.tv/vplay/.*m-(\d+)', rurl) else: html = get_content(url) vid = r1(r'\"mediaid\":(\d+)', html) return vid
def get_m1905_vid(html): if re.match(r"http://www.1905.com/vod/play/(.*).shtml.*", html): vid = r1(r"http://www.1905.com/vod/play/(.*).shtml.*", html) else: con = get_content(html) vid = r1(r'vid : "(.*)",', con) return vid
def get_pps_vid(html): if re.match(r'http://v.pps.tv/play_(.*).html',html): vid = r1(r'http://v.pps.tv/play_(.*).html',html) else: con = get_content(html) vid = r1(r'url_key: "(.*)",',con) return vid
def get_pps_vid(html): if re.match(r'http://v.pps.tv/play_(.*).html', html): vid = r1(r'http://v.pps.tv/play_(.*).html', html) else: con = get_content(html) vid = r1(r'url_key: "(.*)",', con) return vid
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) content=getHtml(url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl=None if content: # video, all type should be considered at the last videoUrl=r1(r'<source src="(.*?)"',content) if not videoUrl: # audio videoUrl=r1(r'audio src="(.*?)"',content) if not videoUrl: # all type videoUrl=getVideoInfoByUrl(url) # if videoUrl: # if not os.path.splitext(videoUrl)[1] in suffix: # print videoUrl # videoUrl=None return videoUrl
def playfound(url, title, pic): items = [] if not url.startswith('http'): return [] link = get_html(url) tvId = r1(r'param\[\'tvid\'\]\s*=\s*"(.+)"', link) vid = r1(r'param\[\'vid\'\]\s*=\s*"(.+)"', link) if tvId is not None and vid is not None: items = [{ 'label': title, 'path': url_for('playvideo', tvId=tvId, vid=vid, title=title, pic=pic), 'is_playable': True, 'info': { 'title': title } }] else: albumId = r1('albumid="(.+?)"', link) if albumId is not None: items = episodelist(albumId, 1) return items
def get_funshion_vid(rurl): if re.match(r'http://www.fun.tv/vplay/.*m-(\d+)',rurl): vid = r1(r'http://www.fun.tv/vplay/.*m-(\d+)',rurl) else: html = get_content(url) vid = r1(r'\"mediaid\":(\d+)',html) return vid
def get_cntv_pid(html): if re.match(r'http://tv.cntv.cn/.*/(\w+)', html): pid = r1(r'http://tv.cntv.cn/.*/(\w+)', html) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', html): pid = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', html) else: raise NotImplementedError(html) return pid
def video_from_url(self, url, **kwargs): if re.match(r'http://share.vrs.sohu.com', url): vid = r1('id=(\d+)', url) else: html = get_html(url) vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) assert vid return self.video_from_vid(vid, **kwargs)
def tudou_download_by_id(self, id, title, **kwargs): html = get_html('http://www.tudou.com/programs/view/%s/' % id) iid = r1(r'iid\s*[:=]\s*(\S+)', html) try: title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") except AttributeError: title = '' return self.tudou_download_by_iid(iid, title, **kwargs)
def get_funshion_playnum(rurl): playNum = r1('http://www.fun.tv/vplay/.*m-\d+.e-(\d+)', rurl) print 'playNum', playNum if playNum == None: html = get_content(rurl) playNum = r1("minfo.playNumber = \'(\d+)\';", html) if playNum == None: playNum = 1 print 'playNum2', playNum return playNum
def albumlist(url): plugin.set_content('TVShows') vid = r1('http?://www.fun.tv/vplay/v-(\w+)', url) epid = r1('http?://www.fun.tv/vplay/.*g-(\w+)', url) if vid: return singleVideo(url) # play single video elif epid: return seriesList(url) # list series else: return []
def get_funshion_playnum(rurl): playNum = r1('http://www.fun.tv/vplay/.*m-\d+.e-(\d+)',rurl) print 'playNum',playNum if playNum == None: html = get_content(rurl) playNum = r1("minfo.playNumber = \'(\d+)\';",html) if playNum == None: playNum = 1 print 'playNum2',playNum return playNum
def video_from_url(self, url, **kwargs): vid = r1(r'http?://www.fun.tv/vplay/v-(\w+)', url) if vid: return self.video_from_vid(vid, single_video=True, **kwargs) else: vid = r1(r'http?://www.fun.tv/vplay/.*v-(\w+)', url) if not vid: epid = r1(r'http?://www.fun.tv/vplay/.*g-(\w+)', url) url = 'http://pm.funshion.com/v5/media/episode?id={}&cl=mweb&uc=111'.format( epid) html = get_html(url) meta = loads(html) vid = meta['episodes'][0]['id'] return self.video_from_vid(vid, **kwargs)
def vid_from_url(self, url, **kwargs): link = get_html(url) tvId = r1(r'#curid=(.+)_', url) or \ r1(r'tvid=([^&]+)', url) or \ r1(r'data-player-tvid="([^"]+)"', link) or \ r1(r'tv(?:i|I)d=(.+?)\&', link) or \ r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', link) videoId = r1(r'#curid=.+_(.*)$', url) or \ r1(r'vid=([^&]+)', url) or \ r1(r'data-player-videoid="([^"]+)"', link) or \ r1(r'vid=(.+?)\&', link) or \ r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', link) if tvId is not None and videoId is not None: return tvId, videoId
def get_kankan_mparam(gcid,param): info = get_content('http://mp4.cl.kankan.com/getCdnresource_flv?gcid={}'.format(gcid)) ip = r1(r'ip:"(.*?)"',info) path = r1(r'path:"(.*?)"',info) url = 'http://' + ip +'/'+ path param1 = r1(r'param1:(.*),',info) param2 = r1(r'param2:(.*)}',info) if param == 'url': return url elif param == 'param1': return param1 elif param == 'param2': return param2 else: return url
def relatedList(url): epid = r1('http?://www.fun.tv/vplay/.*g-(\w+)', url) if not epid: epid = r1('http?://www.fun.tv/vplay/v-(\w+)', url) # rel_api = 'http://api1.fun.tv/api_get_related_videos/%s/media?isajax=1' # rel_api = 'http://api1.fun.tv/api_get_related_videos/%s/video?isajax=1' rel_api = 'http://pm.funshion.com/v6/media/relate?id=%s' link = get_html(rel_api % epid) jsdata = loads(link) relates = jsdata['relates'] items = [] for x in relates: items.append({ 'label': BANNER_FMT % x['name'], 'path': url_for('stay') }) for y in x['contents']: pic = y['poster'] if y['poster'] else y['still'] info = {} dur = y.get('duration') if dur: duration = 0 for t in dur.split(':'): duration = duration * 60 + int(t) info['duration'] = duration info['tltle'] = y['name'] info['plot'] = y['aword'] if y['template'] == 'vplay': items.append({ 'label': y['name'], 'path': url_for('playvideo', url=HOST_URL + '/vplay/v-' + y['id']), 'thumbnail': pic, 'is_playable': True, 'info': info }) else: items.append({ 'label': y['name'], 'path': url_for('albumlist', url=HOST_URL + '/vplay/g-' + y['id']), 'thumbnail': pic, 'info': info }) return items
def video_from_url(self, url, **kwargs): assert re.match(r'http://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) title = r1(r'data-title="([^"]+)"', html) assert title if match1(url, r'_(\d+)$'): # current P title = title + " " + r1(r'active">([^<]*)', html) vid = r1('data-vid="(\d+)"', html) up = r1('data-name="([^"]+)"', html) p_title = r1('active">([^<]+)', html) title = '%s (%s)' % (title, up) if p_title: title = '%s - %s' % (title, p_title) return self.video_from_vid(vid, **kwargs)
def get_kankan_mparam(gcid, param): info = get_content( 'http://mp4.cl.kankan.com/getCdnresource_flv?gcid={}'.format(gcid)) ip = r1(r'ip:"(.*?)"', info) path = r1(r'path:"(.*?)"', info) url = 'http://' + ip + '/' + path param1 = r1(r'param1:(.*),', info) param2 = r1(r'param2:(.*)}', info) if param == 'url': return url elif param == 'param1': return param1 elif param == 'param2': return param2 else: return url
def video_from_url(self, url, **kwargs): assert re.match(r'http://v.pptv.com/show/(\w+)\.html', url) html = get_html(url) id = r1(r'webcfg\s*=\s*{"id":\s*(\d+)', html) assert id return self.video_from_vid(id, **kwargs)
def getVideoByVid(vid): url = 'http://vv.video.qq.com/geturl?otype=xml&platform=1&vid=%s&format=2' % vid content=getHtml(url) videoUrl=None if content: videoUrl=r1(r'<url>(.*?)</url>',content) return videoUrl
def getVideoByUrl(url): # url=r'http://tv.sohu.com/20140904/n404054190.shtml' html = getHtml(url) if not html: return vid = r1(r'share.vrs.sohu.com/(.*?)/',html) streamTypes = ["norVid", "highVid", "superVid", "oriVid"] streamType_url = "http://hot.vrs.sohu.com/vrs_flash.action?vid=" + vid content=getHtml(streamType_url) if not content: return jsonContent = json.loads(content)['data'] videos=[] for streamType in streamTypes: # print streamType streamType_id = jsonContent[streamType] try: allot, prot, clipsURL, su = getPram(streamType_id) if(len(clipsURL)!=len(su)): continue else: video=[] for i in range(len(clipsURL)): allot_url = "http://%s/?prot=%s&file=%s&new=%s"%(allot, prot, clipsURL[i], su[i]) prefix, key = getKey(allot_url) realUrl = "%s%s?key=%s"%(prefix[0:-1], su[i], key) # print realUrl video.append(realUrl) if video: videos.append(video) except: pass if len(videos)>0: return videos[0] return videos
def get_m1905_urls(vid): urls = [] m3u8url = get_m1905_m3u8(vid) if m3u8url != None: urls.append({"rate": "标清", "furls": [m3u8url]}) fir = r1(r"(\d).*", vid) sec = r1(r"\d(\d).*", vid) info = get_content("http://static.m1905.cn/profile/vod/{}/{}/{}_1.xml".format(fir, sec, vid)) root = ET.fromstring(info) links = root.find("playlist/item").attrib for i in links: if i in ["url", "sdurl", "bkurl", "hdurl"]: template = {} template["rate"] = get_clarity(i) template["furls"] = [links[i]] urls.append(template) return urls
def get_m1905_urls(vid): urls = [] m3u8url = get_m1905_m3u8(vid) if m3u8url != None: urls.append({'rate':'标清','furls':[m3u8url]}) fir = r1(r'(\d).*',vid) sec = r1(r'\d(\d).*',vid) info = get_content('http://static.m1905.cn/profile/vod/{}/{}/{}_1.xml'.format(fir,sec,vid)) root = ET.fromstring(info) links = root.find('playlist/item').attrib for i in links: if i in ['url','sdurl','bkurl','hdurl']: template = {} template['rate'] = get_clarity(i) template['furls'] = [links[i]] urls.append(template) return urls
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) # url is like : http://v.qq.com/news/?tag=hot&vid=a00153364t6 vid=r1(r'.*?vid=(.*)',url) videoUrl=getVideoByVid(vid) return videoUrl
def get_vid(url): html = get_content(url) try: pattern = re.compile("share.vrs.sohu.com/(.*?)/") match = pattern.search(html) vid = match.group(1) except: vid = r1(r'vid="(.*)";', html) return vid
def get_vid(url): html = get_content(url) try: pattern = re.compile("share.vrs.sohu.com/(.*?)/") match = pattern.search(html) vid = match.group(1) except : vid = r1(r'vid="(.*)";',html) return vid
def vid_from_url(self, url, **kwargs): html = get_html(url) if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') elif re.match(r'http://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url): id = r1(r'var guid = "(\w+)"', html) elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): id = r1(r'videoCenterId","(\w+)"', html) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) else: return None return id
def get_m1905_m3u8(vid): try: url = 'http://www.1905.com/api/video/getmediainfo.php?id={}&type=0&source_key=m3u8ipad'.format(vid) con = get_content(url) m3url = r1(r'"iosurl":"(.*?)",',con) m3u8 = base64.decodestring(m3url) except Exception,e: print e m3u8 = None
def get_m1905_m3u8(vid): try: url = "http://www.1905.com/api/video/getmediainfo.php?id={}&type=0&source_key=m3u8ipad".format(vid) con = get_content(url) m3url = r1(r'"iosurl":"(.*?)",', con) m3u8 = base64.decodestring(m3url) except Exception, e: print e m3u8 = None
def getVideoByUrl(url): # http://dp.sina.cn/dpool/video/pad/play.php?url=http://video.sina.com.cn/p/news/c/v/2014-09-03/214064108827.html ipad_url=r'http://dp.sina.cn/dpool/video/pad/play.php?url=' # r_url=ipad_url+urllib.quote(url) r_url=ipad_url+url # print r_url content=getHtml(r_url) if content: url=r1(r'<source.*?src="(.*?)"',content) return url
def get_pps_urls_by_id(vid): urls = [] for i in range(0,2): con = get_content('http://dp.ugc.pps.tv/get_play_url_cdn.php?sid={}&flash_type=1&type={}'.format(vid, i)) if 'pfv' in con: template = {} con = r1(r'(.*)&all.*',con) template['rate'] = get_pps_rate(str(i)) template['furls'] = [con] urls.append(template) return urls
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) content=getHtml(url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl=None if content: # video videoUrl=r1(r"<video.*?src='(.*?)'",content) if not videoUrl: sourceWeb=r1(r'src="(.*?)" data-vid',content) dataVid=r1(r'data-vid="(.*?)"',content) if 'ku6' in sourceWeb and dataVid: videoUrl=getKu6VideoByVid(dataVid) return videoUrl
def getVideoInfoByUrl(url): # url is like : http://domestic.kankanews.com/c/2014-08-04/0015274473.shtml # xml url is like : http://www.kankanews.com/vxml/2014-08-04/0015274473.xml part1=r1(r'(/\d{4}-\d{2}-\d{2}/\w*?)\.',url) xml_url=r'http://www.kankanews.com/vxml%s.xml'%part1 content=getHtml(xml_url) videoUrl=None if content: root=ET.fromstring(content) resolution=root[0].text.replace('h264_1500k_mp4','h264_450k_mp4') # just a patch videoUrl= root[1].text+resolution return videoUrl
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) content=getGzipHtml(url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl=None if content: videoUrl=r1(r'<param.*?videoUrl=(.*?)"',content) return videoUrl
def get_pps_urls_by_id(vid): urls = [] for i in range(0, 2): con = get_content( 'http://dp.ugc.pps.tv/get_play_url_cdn.php?sid={}&flash_type=1&type={}' .format(vid, i)) if 'pfv' in con: template = {} con = r1(r'(.*)&all.*', con) template['rate'] = get_pps_rate(str(i)) template['furls'] = [con] urls.append(template) return urls
def get_iqiyi_urls(url): allurls = [] threads = [] gen_uid = uuid4().hex html = get_content(url) tvid = r1(r'data-player-tvid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html) assert tvid assert videoid info = getVMS(tvid,videoid,gen_uid) bids = [] videos = [] try: for i in info["data"]["vp"]["tkl"][0]["vs"]: bid=int(i["bid"]) bids.append(bid) video_links=i["fs"] videos.append(video_links) for i in range(0,len(videos)): thread1 = getUrls(i,[videos[i], gen_uid, info,str(bids[i])]) threads.append(thread1) thread1.start() except Exception,e: print e
def get_iqiyi_urls(url): allurls = [] threads = [] gen_uid = uuid4().hex html = get_content(url) tvid = r1(r'data-player-tvid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html) assert tvid assert videoid info = getVMS(tvid, videoid, gen_uid) bids = [] videos = [] try: for i in info["data"]["vp"]["tkl"][0]["vs"]: bid = int(i["bid"]) bids.append(bid) video_links = i["fs"] videos.append(video_links) for i in range(0, len(videos)): thread1 = getUrls(i, [videos[i], gen_uid, info, str(bids[i])]) threads.append(thread1) thread1.start() except Exception, e: print e
def seriesList(url): epid = r1('http?://www.fun.tv/vplay/.*g-(\w+)', url) # url = 'http://api.funshion.com/ajax/get_web_fsp/%s/mp4?isajax=1' purl = 'http://api.funshion.com/ajax/vod_panel/%s/w-1?isajax=1' #&dtime=1397342446859 link = get_html(purl % epid) intro = loads(get_html(profile_m.format(epid))) poster = intro['poster'].encode('utf-8') json_response = loads(link) if json_response['status'] == 404: xbmcgui.Dialog().ok(plugin.addon.getAddonInfo('name'), '本片暂不支持网页播放') return [] items = [] videos = json_response['data']['videos'] # name = json_response['data']['name'].encode('utf-8') for item in videos: p_name = item['name'].encode('utf-8') p_url = httphead(item['url'].encode('utf-8')) # p_number = str(item['number']) p_thumb = item['pic'].encode('utf-8') seconds = item['duration'] if item['dtype'] == 'prevue': extra = EXTRA % '|预' else: extra = '' items.append({ 'label': p_name + extra, 'path': url_for('playvideo', url=p_url), 'thumbnail': p_thumb, 'is_playable': True, 'info': {'title': p_name, 'duration': seconds, 'plot': intro['description']} }) # playlist items += playList(url) # related items += relatedList(url) return items
def video_from_vid(self, vid, **kwargs): api = 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' html = get_html(api + vid) info = loads(html) title = info['title'] video = info['video'] alternatives = [x for x in video.keys() if 'hapters' in x] res = ['lowChapters', 'chapters', 'chapters2', 'chapters3', 'chapters4'] level = kwargs.get('level', 0) level = min(level, len(alternatives) - 1) chapters = video[alternatives[level]] urls = [x['url'] for x in chapters] ext = r1(r'\.([^.]+)$', urls[0]) assert ext in ('flv', 'mp4') return urls
def getVideoByUrl(url): # tDir=r'e:\tmp' # fileName=r'v1.html' # filePath=os.path.join(tDir,fileName) # url is like:http://v.ifeng.com/news/world/201408/015041f2-2979-9982-9fb1-950a9390ac64.shtml # vInfo_url_prefix=r'http://v.ifeng.com/video_info_new/4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml' vInfo_url_prefix = r"http://v.ifeng.com/video_info_new/" # 4/48/01de5902-0b5a-00f1-5154-47d50dda0448.xml' d1 = r1(r".*/(.*?)\.", url) # print d1,d1[len(d1)-2],d1[len(d1)-2:len(d1)] vInfo_url = vInfo_url_prefix + d1[len(d1) - 2] + r"/" + d1[len(d1) - 2 : len(d1)] + r"/" + d1 + r".xml" # print vInfo_url content = getHtml(vInfo_url) # if content: # fileKit.writeFileBinary(filePath, content) # content=fileKit.readFileBinary(filePath) videoUrl = None if content: root = ET.fromstring(content) videoUrl = root[0].attrib.get("VideoPlayUrl") return videoUrl
def get_urls(url): urls = [] for i in range(0,3): template = {} flvcdurl = "http://www.flvcd.com/parse.php?format={}&kw={}".format(form[i], quote(url)) content = get_content(flvcdurl) furls = r2('<BR><a href=\"(.*?)\" target=',content) if furls!= []: print 'flvcd multi' template['rate'] = rate[i] template['furls'] = furls urls.append(template) else: print 'flvcd single' sfurls = r1('<br>.*?<a href=\"(.*?)\" target',content) print 'sfurls',sfurls if(sfurls!=None): template['rate'] = rate[i] template['furls'] = [sfurls] urls.append(template) return parser2dic(urls)
def crawl_img_urls(url,page=25,face=None): # return 60 image urls every page # url is like 'http://image.baidu.com/n/pc_search?queryImageUrl=http%3A%2F%2Fb.hiphotos.baidu.com%2Fimage%2Fpic%2Fitem%2F0eb30f2442a7d9334945b2c7a84bd11372f00190.jpg&querySign=408832356%2C3878106824&fm=result&pos=upload' query_url=r1(pattern,url) if face: search_url='http://image.baidu.com/n/face?queryImageUrl=%s&rn=60&pn='%query_url else: search_url='http://image.baidu.com/n/similar?queryImageUrl=%s&rn=60&pn='%query_url search_urls=[search_url+str(i*60) for i in range(page)] pool=Pool() # default core number if face: results=pool.map(extract_imageurl_face, search_urls) else: results=pool.map(extract_imageurl, search_urls) pool.close() pool.join() url_list=[] for result in results: url_list+=result url_set=set(url_list) savetoImgUrls(url_set) printFile(img_url_file)
def get_hdvid(html): hdvid = r1(r"hd_vid:\'(.*?)\',", html) return hdvid
def get_ipadvid(html): ipadvid = r1(r"ipad_vid:\'(.*?)\',", html) return ipadvid
def get_suffix_by_html(html): con = get_content(html) playUrl = r1(r"_playUrl = \'(.*?)\',", con) playKey = r1(r"_playKey = \'(.*?)\',", con) suffix = "/url/" + playUrl + "/key/" + playKey return suffix
def get_wasu_id(url): if re.match(r"http://www.wasu.cn/Play/show/id/(.*)", url): vid = r1(r"http://www.wasu.cn/Play/show/id/(.*)", url) return vid
def get_suffix_by_html(html): con = get_content(html) playUrl = r1(r'_playUrl = \'(.*?)\',', con) playKey = r1(r'_playKey = \'(.*?)\',', con) suffix = '/url/' + playUrl + '/key/' + playKey return suffix
def get_newsvid(url): newsvid = r1(r".*#(.*)", url) return newsvid
def get_wasu_id(url): if re.match(r'http://www.wasu.cn/Play/show/id/(.*)', url): vid = r1(r'http://www.wasu.cn/Play/show/id/(.*)', url) return vid
def get_gcid(html): con = get_content(html) gcid = r1(r'http://pubnet.sandai.net:8080/\d+/(.*?)/.*?.mp4', con) return gcid
def singeralbum(url, id, page): plugin.set_content('music') SINGER = 'http://www.kuwo.cn' page = int(page) yield {'label': BANNER_FMT % u'专辑', 'path': url_for('stay')} html = get_html(SINGER + url.replace(' ', '%20')) # some singer name has ' ' tree = BeautifulSoup(html, "html.parser") # ALBUM ####################################### soup = tree.find_all('div', {'id': 'album'}) li = soup[0].find_all('li') for album in li: name = album.find('span', {'class': 'name'}) thumb = album.find('div', {'class': 'cover'}) image = thumb.img['src'] yield { 'label': name.text.strip(), 'path': url_for('musiclist', url=name.a['href']), 'thumbnail': str(image) # why is unicode not string?? } # MV ############################################### soup = tree.find_all('div', {'id': 'mv'}) li = soup[0].find_all('li') yield {'label': BANNER_FMT % u'MV', 'path': url_for('stay')} for mv in li: name = mv.find('span', {'class': 'name'}) mid = r1('\/mv\/(\d+)?', name.a['href']) image = mv.find('div', {'class': 'cover'}) image = image.img['src'] yield { 'label': name.text.strip(), 'path': url_for('playmv', mid=mid), 'is_playable': True, 'thumbnail': str(image), 'info': { 'title': name.text.strip() } } # SONGS ############################################### yield {'label': BANNER_FMT % u'单曲', 'path': url_for('stay')} aurl = 'http://www.kuwo.cn/artist/contentMusicsAjax' aurl += '?artistId=%s&pn=%d&rn=15' % (id, page) html = get_html(aurl) l = re.compile('"id":"MUSIC_(\d+)').findall(html) maxpage = re.compile('data-page="(\d+)"').findall(html) maxpage = int(maxpage[0]) tree = BeautifulSoup(html, 'html.parser') soup = tree.find_all('li', {'class': 'onLine'}) if page > 0: yield { 'label': BANNER_FMT % u'上一页', 'path': url_for('singeralbum', url=url, id=id, page=page - 1) } for song in soup: mid = re.compile('\d+').findall(song.a['href']) mid = mid[0] html = get_html(musicAPI + mid) yield { 'label': song.a.text, 'path': url_for('playmusic', mid=mid), 'is_playable': True, 'info': { 'title': song.a.text } } if page < maxpage: yield { 'label': BANNER_FMT % u'下一页', 'path': url_for('singeralbum', url=url, id=id, page=page + 1) }
def get_gcid(html): con = get_content(html) gcid = r1(r'http://pubnet.sandai.net:8080/\d+/(.*?)/.*?.mp4',con) return gcid