def getCommentIqiyi(cls, playlink, moviename): if playlink != '': source = Request.getSource(playlink) if source is not None: m = re.search(r'qitanid="\w+"', source) vid = m.group(0).split('"')[1] for page in range(1, 50): jslink = 'http://api.t.iqiyi.com/qx_api/comment/get_video_comments?page='+str(page)+'&qitanid='+str(vid) commentsource = Request.getSource(jslink) pat = re.compile(r'content":"(.*?)"') try: commentlist = pat.findall(commentsource) for comm in commentlist: unescomm = pyunescape.pyunescape(comm) if unescomm is not None and unescomm != "": db.insert((moviename, 'iqiyi', unescomm)) except: continue
def resolveSohu(cls, movielink, moviename): """ sohu:从电影搜索页提取该搜索结果的播放页地址 """ try: f = Request.getSource(movielink) source = BeautifulSoup(f) links = source.find_all('a', title='点击观看') playurl = links[0].get('href') Comments.Comments.getCommentSohu(playurl, moviename) except: pass
def getCommentSohu(cls, playlink, moviename): if playlink != '': source = Request.getSource(playlink) if source is not None: m = re.search(r'vid\s*=\s*[\"\']\w+[\"\']', source) vid = m.group(0).split('"')[1] n = re.search(r'playlistId\s*=\s*[\"\']\w+[\"\']', source) playlistid = n.group(0).split('"')[1] jslink = 'http://access.tv.sohu.com/reply/list/1000_' + \ str(playlistid) + '_' + str(vid) + '_0_2000.js' commentsource = Request.getSource(jslink) try: comments = re.search(r'\[.*\]', commentsource) comments = comments.group(0).replace("'",'"') pat = re.compile(r'content":"(.*?)"') commentlist = pat.findall(comments) for comm in commentlist: unescomm = pyunescape.pyunescape(comm) if unescomm is not None and unescomm != "": db.insert((moviename, 'sohu', unescomm)) except: pass
def resolveFunshion(cls, movielink): """ funshion:从电影搜索页提取该搜索结果的播放页地址 """ try: f = Request.getSource(movielink) source = BeautifulSoup(f) links = source.find_all('ul', "search_list") if(len(links) != 0): playurl = 'http://www.funshion.com' + \ links[0].find('a').get('href') print playurl except: print '从搜索结果提取播放地址失败'
def resolveTudou(cls, movielink): """ tudou:从电影搜索页提取该搜索结果的播放页地址 和优酷一样,都是从搜库提取数据,忽略之 """ try: f = Request.getSource(movielink) source = BeautifulSoup(f) links = source.find_all('div', "btnplay_s") if(len(links) != 0): playurl = links[0].find('a').get('href') # if playurl[:7] != '/search': print playurl except: print '从搜索结果提取播放地址失败'
def resolveYouku(cls, movielink): """ youku:从电影搜索页提取该搜索结果的播放页地址 去掉重定向到其他视频网站的链接,以及不存在的链接 """ try: f = Request.getSource(movielink) source = BeautifulSoup(f) links = source.find_all('div', "btnplay_s") if(len(links) != 0): playurl = links[0].find('a').get('href') if playurl[:7] != '/search': print playurl except: print '从搜索结果提取播放地址失败'
def getCommentLetv(cls, playlink, moviename): if playlink != '': vid = playlink.split('/')[-1].split('.')[0] for i in range(1, 120): jslink = 'http://api.my.letv.com/vcm/api/g?type=video&xid='+str(vid)+'&page='+str(i) source = Request.getSource(jslink) try: pat = re.compile(r'content":"(.*?)"') commentlist = pat.findall(source) for comm in commentlist: unescomm = pyunescape.pyunescape(comm) if unescomm is not None and unescomm != "": db.insert((moviename, 'letv', unescomm)) except: continue
def getCommentFunshion(cls, playlink, moviename): if playlink != '': vid = playlink.split('/')[-2] for page in range(1, 31): jslink = 'http://q.funshion.com/ajax/get_comment/media/'+vid+'/all?pg='+str(page) source = Request.getSource(jslink) try: pat = re.compile(r'content":"(.*?)"') commentlist = pat.findall(source) for comm in commentlist: unescomm = pyunescape.pyunescape(comm) if unescomm is not None and unescomm != "": db.insert((moviename, 'funshion', unescomm)) except: continue
def getCommentYouku(cls, playlink, moviename): if playlink != '': vid = playlink.split('_')[-1].split('.')[0] for page in range(1, 100): jslink = 'http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap={%22videoid%22:%22'+vid+'%22,'+'%22page%22:'+str(page)+'}' commentsource = Request.getSource(jslink) try: pat = re.compile(r'content_(.*?)<br') commentlist = pat.findall(commentsource) for comm in commentlist: comm = comm.split('">') unescomm = pyunescape.pyunescape(comm[2]) if unescomm is not None and unescomm != "": db.insert((moviename, 'youku', unescomm)) except: continue