Example #1
0
 def getCommentIqiyi(cls, playlink, moviename):
     if playlink != '':
         source = Request.getSource(playlink)
         if source is not None:
             m = re.search(r'qitanid="\w+"', source)
             vid = m.group(0).split('"')[1]
             for page in range(1, 50):
                 jslink = 'http://api.t.iqiyi.com/qx_api/comment/get_video_comments?page='+str(page)+'&qitanid='+str(vid)
                 commentsource = Request.getSource(jslink)
                 pat = re.compile(r'content":"(.*?)"')
                 try:
                     commentlist = pat.findall(commentsource)
                     for comm in commentlist:
                         unescomm = pyunescape.pyunescape(comm)
                         if unescomm is not None and unescomm != "":
                             db.insert((moviename, 'iqiyi', unescomm))
                 except:
                     continue
Example #2
0
 def resolveSohu(cls, movielink, moviename):
     """
     sohu:从电影搜索页提取该搜索结果的播放页地址
     """
     try:
         f = Request.getSource(movielink)
         source = BeautifulSoup(f)
         links = source.find_all('a', title='点击观看')
         playurl = links[0].get('href')
         Comments.Comments.getCommentSohu(playurl, moviename)
     except:
         pass
Example #3
0
 def getCommentSohu(cls, playlink, moviename):
     if playlink != '':
         source = Request.getSource(playlink)
         if source is not None:
             m = re.search(r'vid\s*=\s*[\"\']\w+[\"\']', source)
             vid = m.group(0).split('"')[1]
             n = re.search(r'playlistId\s*=\s*[\"\']\w+[\"\']', source)
             playlistid = n.group(0).split('"')[1]
             jslink = 'http://access.tv.sohu.com/reply/list/1000_' + \
                 str(playlistid) + '_' + str(vid) + '_0_2000.js'
             commentsource = Request.getSource(jslink)
             try:
                 comments = re.search(r'\[.*\]', commentsource)
                 comments = comments.group(0).replace("'",'"')
                 pat = re.compile(r'content":"(.*?)"')
                 commentlist = pat.findall(comments)
                 for comm in commentlist:
                     unescomm = pyunescape.pyunescape(comm)
                     if unescomm is not None and unescomm != "":
                         db.insert((moviename, 'sohu', unescomm))
             except:
                 pass
Example #4
0
 def resolveFunshion(cls, movielink):
     """
     funshion:从电影搜索页提取该搜索结果的播放页地址
     """
     try:
         f = Request.getSource(movielink)
         source = BeautifulSoup(f)
         links = source.find_all('ul', "search_list")
         if(len(links) != 0):
             playurl = 'http://www.funshion.com' + \
                 links[0].find('a').get('href')
             print playurl
     except:
         print '从搜索结果提取播放地址失败'
Example #5
0
 def resolveTudou(cls, movielink):
     """
     tudou:从电影搜索页提取该搜索结果的播放页地址
     和优酷一样,都是从搜库提取数据,忽略之
     """
     try:
         f = Request.getSource(movielink)
         source = BeautifulSoup(f)
         links = source.find_all('div', "btnplay_s")
         if(len(links) != 0):
             playurl = links[0].find('a').get('href')
             # if playurl[:7] != '/search':
             print playurl
     except:
         print '从搜索结果提取播放地址失败'
Example #6
0
 def resolveYouku(cls, movielink):
     """
         youku:从电影搜索页提取该搜索结果的播放页地址
         去掉重定向到其他视频网站的链接,以及不存在的链接
     """
     try:
         f = Request.getSource(movielink)
         source = BeautifulSoup(f)
         links = source.find_all('div', "btnplay_s")
         if(len(links) != 0):
             playurl = links[0].find('a').get('href')
             if playurl[:7] != '/search':
                 print playurl
     except:
         print '从搜索结果提取播放地址失败'
Example #7
0
 def getCommentLetv(cls, playlink, moviename):
     if playlink != '':
         vid = playlink.split('/')[-1].split('.')[0]
         for i  in range(1, 120):
             jslink = 'http://api.my.letv.com/vcm/api/g?type=video&xid='+str(vid)+'&page='+str(i)
             source = Request.getSource(jslink)
             try:
                 pat = re.compile(r'content":"(.*?)"')
                 commentlist = pat.findall(source)
                 for comm in commentlist:
                     unescomm = pyunescape.pyunescape(comm)
                     if unescomm is not None and unescomm != "":
                         db.insert((moviename, 'letv', unescomm))
             except:
                 continue
Example #8
0
 def getCommentFunshion(cls, playlink, moviename):
     if playlink != '':
         vid = playlink.split('/')[-2]
         for page in range(1, 31):
             jslink = 'http://q.funshion.com/ajax/get_comment/media/'+vid+'/all?pg='+str(page)
             source = Request.getSource(jslink)
             try:
                 pat = re.compile(r'content":"(.*?)"')
                 commentlist = pat.findall(source)
                 for comm in commentlist:
                     unescomm = pyunescape.pyunescape(comm)
                     if unescomm is not None and unescomm != "":
                         db.insert((moviename, 'funshion', unescomm))
             except:
                 continue
Example #9
0
 def getCommentYouku(cls, playlink, moviename):
     if playlink != '':
         vid = playlink.split('_')[-1].split('.')[0]
         for page in range(1, 100):
             jslink = 'http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap={%22videoid%22:%22'+vid+'%22,'+'%22page%22:'+str(page)+'}'
             commentsource = Request.getSource(jslink)
             try:
                 pat = re.compile(r'content_(.*?)<br')
                 commentlist = pat.findall(commentsource)
                 for comm in commentlist:
                     comm = comm.split('">')
                     unescomm = pyunescape.pyunescape(comm[2])
                     if unescomm is not None and unescomm != "":
                         db.insert((moviename, 'youku', unescomm))
             except:
                 continue