Esempio n. 1
0
 def view_capture(self, page, type="全部"):
     s = requests.session()
     play_url = self.__play_url.format(type, page * 35)
     try:
         acmsk = {'class': 'msk'}
         scnb = {'class': 'nb'}
         dcu = {'class': 'u-cover u-cover-1'}
         ucm = {'class': 'm-cvrlst f-cb'}
         s = BeautifulSoup(
             s.get(play_url, headers=self.__headers).content, "html.parser")
         lst = s.find('ul', ucm)
         for play in lst.find_all('div', dcu):
             title = play.find('a', acmsk)['title'].encode('utf-8')
             link = play.find('a', acmsk)['href'].encode('utf-8').replace(
                 "/playlist?id=", "")
             cnt = play.find('span', scnb).text.encode('utf-8').replace(
                 '万', '0000')
             if pysql.single("playlist163", "link", link) is True:
                 pl = pysql.Playlist163(title=title,
                                        link=link,
                                        cnt=int(cnt),
                                        dsc="曲风:{}".format(type))
                 self.session.add(pl)
                 self.session.commit()
     except Exception as e:
         pylog.log.error("抓取歌单出现问题:{} 歌单类型:{} 页码:{}".format(e, type, page))
         raise
Esempio n. 2
0
 def view_links(self, song_id):
     url = "http://music.163.com/song?id=" + str(song_id)
     data = {'id': str(song_id)}
     headers = {
         'Cookie':
         'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;'
     }
     try:
         req = requests.get(url, headers=headers, data=data, timeout=100)
         sup = BeautifulSoup(req.content, "html.parser")
         for link in sup.find_all('li', class_="f-cb"):
             html = link.find('a', 's-fc1')
             if html != None:
                 title = html.get('title').encode('utf-8')
                 song_id = html.get('href')[9:]
                 author = link.find('div', 'f-thide s-fc4').find(
                     'span').get('title').encode('utf-8')
                 if pysql.single("music163", "song_id", song_id) == True:
                     self.session.add(
                         pysql.Music163(song_id=song_id,
                                        song_name=title,
                                        author=author))
                     self.session.flush()
         for link in sup.find_all('a', 'sname f-fs1 s-fc0'):
             play_link = link.get("href").replace("/playlist?id=", "")
             play_name = link.get("title").encode('utf-8')
             if pysql.single("playlist163", "link", play_link) == True:
                 self.session.add(
                     pysql.Playlist163(title=play_name,
                                       link=play_link,
                                       cnt=-1))
                 self.session.flush()
     except:
         self.session.rollback()
         pylog.Log("ERROR 917 : VIEW LINK SONG_ID-" + str(song_id))
Esempio n. 3
0
 def view_links(self, song_id):
     url = "http://music.163.com/song?id=" + str(song_id)
     data = {'id': str(song_id)}
     headers = {
         'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;'  # noqa
     }
     try:
         req = requests.get(url, headers=headers, data=data, timeout=100)
         sup = BeautifulSoup(req.content, "html.parser")
         for link in sup.find_all('li', class_="f-cb"):
             html = link.find('a', 's-fc1')
             if html is not None:
                 title = tools.encode(html.get('title'))
                 song_id = html.get('href')[9:]
                 author = tools.encode(link.find(
                     'div', 'f-thide s-fc4'
                 ).find('span').get('title'))
                 if pysql.single("music163", "song_id", song_id) is True:
                     self.session.add(pysql.Music163(
                         song_id=song_id, song_name=title, author=author
                     ))
                     self.session.flush()
         for link in sup.find_all('a', 'sname f-fs1 s-fc0'):
             play_link = link.get("href").replace("/playlist?id=", "")
             play_name = tools.encode(link.get("title"))
             if pysql.single("playlist163", "link", play_link) is True:
                 self.session.add(pysql.Playlist163(
                     title=play_name, link=play_link, cnt=-1,
                     dsc="来源:热评"
                 ))
                 self.session.flush()
     except Exception as e:
         pylog.log.error("解析页面推荐时出现问题:{} 歌曲ID:{}".format(e, song_id))
Esempio n. 4
0
 def view_capture(self, page, type="全部"):
     play_url = self.__play_url.format(type, page * 35)
     titles = []
     try:
         acmsk = {'class': 'msk'}
         scnb = {'class': 'nb'}
         dcu = {'class': 'u-cover u-cover-1'}
         ucm = {'class': 'm-cvrlst f-cb'}
         data = tools.curl(play_url,self.__headers,type=const.RETURE_HTML)
         lst = data.find('ul', ucm)
         for play in lst.find_all('div', dcu):
             title = tools.encode(play.find('a', acmsk)['title'])
             link = tools.encode(play.find('a', acmsk)['href']).replace("/playlist?id=", "")
             cnt = tools.encode(play.find('span', scnb).text).replace('万', '0000')
             if pysql.single("playlist163","link",link) is True:
                 pl = pysql.Playlist163(title=title, link=link, cnt=int(cnt), dsc="曲风:{}".format(type))
                 self.session.add(pl)
                 self.session.commit()
                 titles.append(title)
         return titles
     except Exception as e:
         pylog.log.error("抓取歌单出现问题:{} 歌单类型:{} 页码:{}".format(e, type, page))
         raise