예제 #1
0
 def view_capture(self, song_id, page=1):
     if page == 1:
         self.session.query(pysql.Comment163).filter(
             pysql.Comment163.song_id == song_id).delete()
         self.session.commit()
     data = {
         'params': self.createParams(page),
         'encSecKey': self.__encSecKey
     }
     url = default.comment_url.format(str(song_id))
     try:
         req = requests.post(url,
                             headers=self.__headers,
                             data=data,
                             timeout=10)
         for comment in req.json()['comments']:
             if comment['likedCount'] > 30:
                 txt = comment['content'].encode('utf-8')
                 author = comment['user']['nickname'].encode('utf-8')
                 liked = comment['likedCount']
                 self.session.add(
                     pysql.Comment163(song_id=song_id,
                                      txt=txt,
                                      author=author,
                                      liked=liked))
                 self.session.flush()
         if page == 1:
             for comment in req.json()['hotComments']:
                 txt = comment['content'].encode('utf-8')
                 author = comment['user']['nickname'].encode('utf-8')
                 liked = comment['likedCount']
                 self.session.add(
                     pysql.Comment163(song_id=song_id,
                                      txt=txt,
                                      author=author,
                                      liked=liked))
                 self.session.flush()
         cnt = int(req.json()['total'])
         self.session.query(pysql.Music163).filter(
             pysql.Music163.song_id == song_id).update({
                 'over': 'Y',
                 'comment': cnt
             })
         self.session.commit()
         return cnt / 20
     except KeyboardInterrupt:
         print("INFO : 解释器请求退出")
         pylog.Log("ERROR 107 : 解释器请求退出")
         exit()
     except:
         self.session.rollback()
         raise
         pylog.Log("ERROR 910 : SONG_ID-" + str(song_id) + " PAGE-" +
                   str(page))
예제 #2
0
 def view_links(self, song_id):
     url = "http://music.163.com/song?id=" + str(song_id)
     data = {'id': str(song_id)}
     headers = {
         'Cookie':
         'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;'
     }
     try:
         req = requests.get(url, headers=headers, data=data, timeout=100)
         sup = BeautifulSoup(req.content, "html.parser")
         for link in sup.find_all('li', class_="f-cb"):
             html = link.find('a', 's-fc1')
             if html != None:
                 title = html.get('title').encode('utf-8')
                 song_id = html.get('href')[9:]
                 author = link.find('div', 'f-thide s-fc4').find(
                     'span').get('title').encode('utf-8')
                 if pysql.single("music163", "song_id", song_id) == True:
                     self.session.add(
                         pysql.Music163(song_id=song_id,
                                        song_name=title,
                                        author=author))
                     self.session.flush()
         for link in sup.find_all('a', 'sname f-fs1 s-fc0'):
             play_link = link.get("href").replace("/playlist?id=", "")
             play_name = link.get("title").encode('utf-8')
             if pysql.single("playlist163", "link", play_link) == True:
                 self.session.add(
                     pysql.Playlist163(title=play_name,
                                       link=play_link,
                                       cnt=-1))
                 self.session.flush()
     except:
         self.session.rollback()
         pylog.Log("ERROR 917 : VIEW LINK SONG_ID-" + str(song_id))
예제 #3
0
 def view_capture(self, page):
     s = requests.session()
     play_url = self.__play_url + str(page * 35)
     try:
         acmsk = {'class': 'msk'}
         scnb = {'class': 'nb'}
         dcu = {'class': 'u-cover u-cover-1'}
         ucm = {'class': 'm-cvrlst f-cb'}
         s = BeautifulSoup(
             s.get(play_url, headers=self.__headers).content, "html.parser")
         lst = s.find('ul', ucm)
         for play in lst.find_all('div', dcu):
             title = play.find('a', acmsk)['title'].encode('utf-8')
             link = play.find('a', acmsk)['href'].encode('utf-8').replace(
                 "/playlist?id=", "")
             cnt = play.find('span', scnb).text.encode('utf-8').replace(
                 '万', '0000')
             if pysql.single("playlist163", "link", link) == True:
                 pl = pysql.Playlist163(title=title,
                                        link=link,
                                        cnt=int(cnt))
                 self.session.add(pl)
                 self.session.commit()
     except:
         pylog.Log("抓取歌单出现问题,歌单页码:" + page)
예제 #4
0
파일: lyric.py 프로젝트: duocang/spider163
 def view_lyric(self, song_id):
     url = default.lyric_url.format(str(song_id))
     s = requests.session()
     try:
         s = BeautifulSoup(
             s.get(url, headers=self.__headers).content, "html.parser")
         lrc = json.loads(s.text)['lrc']['lyric']
         if pysql.single("lyric163", "song_id", song_id):
             self.session.add(pysql.Lyric163(song_id=song_id, txt=lrc))
             self.session.commit()
     except:
         pylog.Log("抓取歌词出现问题,歌曲ID:" + str(song_id))
예제 #5
0
 def view_capture(self, link):
     self.session.query(pysql.Playlist163).filter(
         pysql.Playlist163.link == link).update({'over': 'Y'})
     url = self.__url + str(link)
     s = requests.session()
     try:
         s = BeautifulSoup(
             s.get(url, headers=self.__headers).content, "html.parser")
         musics = json.loads(s.text)['result']['tracks']
         for music in musics:
             name = music['name'].encode('utf-8')
             author = music['artists'][0]['name'].encode('utf-8')
             if pysql.single("music163", "song_id", (music['id'])) == True:
                 self.session.add(
                     pysql.Music163(song_id=music['id'],
                                    song_name=name,
                                    author=author))
                 self.session.commit()
             else:
                 pylog.Log('{} : {} {}'.format("ERROR 103", name,
                                               "Not Single"))
     except:
         pylog.Log('{} : {}'.format("Error 901", url))
예제 #6
0
 def curl_playlist(self,playlist_id):
     url = uapi.playlist_api.format(playlist_id)
     try:
         data = tools.curl(url, self.__headers)
         playlist = data['result']
         self.session.query(pysql.Playlist163).\
             filter(pysql.Playlist163.link == playlist_id).\
             update({"playCount": playlist["playCount"],
                 "shareCount": playlist["shareCount"],
                 "commentCount": playlist["commentCount"],
                 "description": playlist["description"],
                 "tags":",".join(playlist["tags"])})
         return playlist
     except Exception as e:
         pylog.Log("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, playlist_id))
         # pylog.print_warn("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, playlist_id))
         self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == playlist_id).update({'done': 'E'})
         self.session.commit()
예제 #7
0
 def auto_view(self, count=1):
     try:
         if count < 10:
             msc = self.session.query(pysql.Music163).filter(
                 pysql.Music163.over == "N").limit(count)
             for m in msc:
                 self.views_capture(m.song_id, 1, 1)
         else:
             for i in range(count / 10):
                 msc = self.session.query(pysql.Music163).filter(
                     pysql.Music163.over == "N").limit(10)
                 for m in msc:
                     self.views_capture(m.song_id, 1, 1)
             msc = self.session.query(pysql.Music163).filter(
                 pysql.Music163.over == "N").limit(count % 10)
             for m in msc:
                 self.views_capture(m.song_id, 1, 1)
     except:
         pylog.Log("ERROR 918 : AUTO VIEW")