Example #1
0
 def view_capture(self, link):
     self.session.query(pysql.Playlist163).filter(
         pysql.Playlist163.link == link).update({'over': 'Y'})
     url = self.__url + str(link)
     s = requests.session()
     songs = []
     try:
         s = BeautifulSoup(
             s.get(url, headers=self.__headers).content, "html.parser")
         musics = json.loads(s.text)['result']['tracks']
         exist = 0
         for music in musics:
             name = tools.encode(music['name'])
             author = tools.encode(music['artists'][0]['name'])
             if pysql.single("music163", "song_id", (music['id'])) is True:
                 self.session.add(
                     pysql.Music163(song_id=music['id'],
                                    song_name=name,
                                    author=author))
                 self.session.commit()
                 exist = exist + 1
                 songs.append({"name": name, "author": author})
             else:
                 pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name,
                                                    "取消持久化"))
         print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format(
             len(musics), exist))
         return songs
     except Exception as e:
         pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url))
         raise
Example #2
0
 def view_capture(self, page):
     s = requests.session()
     play_url = self.__play_url + str(page * 35)
     try:
         acmsk = {'class': 'msk'}
         scnb = {'class': 'nb'}
         dcu = {'class': 'u-cover u-cover-1'}
         ucm = {'class': 'm-cvrlst f-cb'}
         s = BeautifulSoup(
             s.get(play_url, headers=self.__headers).content, "html.parser")
         lst = s.find('ul', ucm)
         for play in lst.find_all('div', dcu):
             title = play.find('a', acmsk)['title'].encode('utf-8')
             link = play.find('a', acmsk)['href'].encode('utf-8').replace(
                 "/playlist?id=", "")
             cnt = play.find('span', scnb).text.encode('utf-8').replace(
                 '万', '0000')
             if pysql.single("playlist163", "link", link) == True:
                 pl = pysql.Playlist163(title=title,
                                        link=link,
                                        cnt=int(cnt))
                 self.session.add(pl)
                 self.session.commit()
     except:
         pylog.Log("抓取歌单出现问题,歌单页码:" + page)
Example #3
0
 def view_capture(self, link):
     url = self.__url + str(link)
     songs = []
     try:
         data = self.curl_playlist(link)
         musics = data['tracks']
         exist = 0
         for music in musics:
             name = tools.encode(music['name'])
             author = tools.encode(music['artists'][0]['name'])
             if music["bMusic"] is None:
                 play_time = 0
             else:
                 play_time = music["bMusic"]["playTime"]
             if pysql.single("music163", "song_id", (music['id'])) is True:
                 self.session.add(pysql.Music163(song_id=music['id'],song_name=name,author=author,playTime=play_time))
                 self.session.commit()
                 exist = exist + 1
                 songs.append({"name": name,"author": author})
             else:
                 pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化"))
         print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format(len(musics), exist))
         self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == link).update({'done': 'Y'})
         self.session.commit()
         return songs
     except Exception as e:
         pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url))
         self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == url).update({'done': 'E'})
         self.session.commit()
         raise
Example #4
0
 def view_lyric(self, song_id):
     url = default.lyric_url.format(str(song_id))
     s = requests.session()
     try:
         s = BeautifulSoup(
             s.get(url, headers=self.__headers).content, "html.parser")
         lrc = json.loads(s.text)['lrc']['lyric']
         if pysql.single("lyric163", "song_id", song_id):
             self.session.add(pysql.Lyric163(song_id=song_id, txt=lrc))
             self.session.commit()
     except:
         pylog.Log("抓取歌词出现问题,歌曲ID:" + str(song_id))
Example #5
0
 def view_lyric(self, song_id):
     url = default.lyric_url.format(str(song_id))
     s = requests.session()
     try:
         s = BeautifulSoup(s.get(url, headers=self.__headers).content, "html.parser")
         lrc = json.loads(s.text)['lrc']['lyric']
         if pysql.single("lyric163", "song_id", song_id):
             self.session.add(pysql.Lyric163(song_id=song_id, txt=lrc))
             self.session.query(pysql.Music163).filter(pysql.Music163.song_id == song_id).update({"has_lyric": "Y"})
             self.session.commit()
     except Exception:
         pylog.log.error("抓取歌词出现问题,歌曲ID:" + str(song_id))
         raise
Example #6
0
File: lyric.py Project: rex3092/123
 def view_lyric(self, song_id):
     url = uapi.lyric_url.format(str(song_id))
     try:
         data = tools.curl(url,self.__headers)
         lrc = data['lrc']['lyric']
         if pysql.single("lyric163", "song_id", song_id):
             self.session.add(pysql.Lyric163(song_id=song_id, txt=lrc))
             self.session.query(pysql.Music163).filter(pysql.Music163.song_id == song_id).update({"has_lyric": "Y"})
             self.session.commit()
     except Exception as e:
         self.session.query(pysql.Music163).filter(pysql.Music163.song_id == song_id).update({"has_lyric": "E"})
         self.session.commit()
         pylog.log.error("抓取歌词出现问题:{} 歌曲ID:{}".format(e, song_id))
Example #7
0
 def view_links(self, song_id):
     url = "http://music.163.com/song?id=" + str(song_id)
     data = {'id': str(song_id)}
     headers = {
         'Cookie':
         'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;'  # noqa
     }
     try:
         req = requests.get(url, headers=headers, data=data, timeout=100)
         sup = BeautifulSoup(req.content, "html.parser")
         for link in sup.find_all('li', class_="f-cb"):
             html = link.find('a', 's-fc1')
             if html is not None:
                 title = tools.encode(html.get('title'))
                 song_id = html.get('href')[9:]
                 author = tools.encode(
                     link.find('div',
                               'f-thide s-fc4').find('span').get('title'))
                 if pysql.single("music163", "song_id", song_id) is True:
                     self.session.add(
                         pysql.Music163(song_id=song_id,
                                        song_name=title,
                                        author=author))
                     self.session.flush()
         for link in sup.find_all('a', 'sname f-fs1 s-fc0'):
             play_link = link.get("href").replace("/playlist?id=", "")
             play_name = tools.encode(link.get("title"))
             if pysql.single("playlist163", "link", play_link) is True:
                 self.session.add(
                     pysql.Playlist163(title=play_name,
                                       link=play_link,
                                       cnt=-1,
                                       dsc="来源:热评"))
                 self.session.flush()
     except Exception as e:
         pylog.log.error("解析页面推荐时出现问题:{} 歌曲ID:{}".format(e, song_id))
Example #8
0
 def view_capture(self, page, type="全部"):
     play_url = self.__play_url.format(type, page * 35)
     titles = []
     try:
         acmsk = {'class': 'msk'}
         scnb = {'class': 'nb'}
         dcu = {'class': 'u-cover u-cover-1'}
         ucm = {'class': 'm-cvrlst f-cb'}
         data = tools.curl(play_url,self.__headers,type=const.RETURE_HTML)
         lst = data.find('ul', ucm)
         for play in lst.find_all('div', dcu):
             title = tools.encode(play.find('a', acmsk)['title'])
             link = tools.encode(play.find('a', acmsk)['href']).replace("/playlist?id=", "")
             cnt = tools.encode(play.find('span', scnb).text).replace('万', '0000')
             if pysql.single("playlist163","link",link) is True:
                 pl = pysql.Playlist163(title=title, link=link, cnt=int(cnt), dsc="曲风:{}".format(type))
                 self.session.add(pl)
                 self.session.commit()
                 titles.append(title)
         return titles
     except Exception as e:
         pylog.log.error("抓取歌单出现问题:{} 歌单类型:{} 页码:{}".format(e, type, page))
         raise
Example #9
0
 def view_capture(self, link):
     self.session.query(pysql.Playlist163).filter(
         pysql.Playlist163.link == link).update({'over': 'Y'})
     url = self.__url + str(link)
     s = requests.session()
     try:
         s = BeautifulSoup(
             s.get(url, headers=self.__headers).content, "html.parser")
         musics = json.loads(s.text)['result']['tracks']
         for music in musics:
             name = music['name'].encode('utf-8')
             author = music['artists'][0]['name'].encode('utf-8')
             if pysql.single("music163", "song_id", (music['id'])) == True:
                 self.session.add(
                     pysql.Music163(song_id=music['id'],
                                    song_name=name,
                                    author=author))
                 self.session.commit()
             else:
                 pylog.Log('{} : {} {}'.format("ERROR 103", name,
                                               "Not Single"))
     except:
         pylog.Log('{} : {}'.format("Error 901", url))
Example #10
0
    def view_capture(self, link):
        url = self.__url + str(link)
        songs = []
        try:
            data = self.curl_playlist(link)
            musics = data['tracks']
            exist = 0
            for music in musics:
                name = tools.encode(music['name'])
                authors = []
                for art in music['artists']:
                    authors.append(tools.encode(art['name']))
                if music["bMusic"] is None:
                    play_time = 0
                else:
                    play_time = music["bMusic"]["playTime"]
                if pysql.single("music163", "song_id", (music['id'])) is True:
                    self.session.add(
                        pysql.Music163(song_id=music['id'],
                                       song_name=name,
                                       author=",".join(authors),
                                       playTime=play_time))
                    self.session.commit()
                    exist = exist + 1
                    songs.append({"name": name, "author": ",".join(authors)})
                else:
                    pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name,
                                                       "取消持久化"))
                # 处理官方榜单
                if int(link) in uapi.top.keys():
                    updateTime = datetime.datetime.fromtimestamp(
                        data['updateTime'] /
                        1000).strftime("%Y-%m-%d %H:%M:%S")
                    createTime = datetime.datetime.fromtimestamp(
                        data['createTime'] /
                        1000).strftime("%Y-%m-%d %H:%M:%S")
                    position = music['position']
                    lastrank = 100000000
                    with tools.ignored(Exception):
                        lastrank = music['lastRank']
                    cnt = self.session.query(pysql.Toplist163).filter(
                        pysql.Toplist163.update_time == updateTime,
                        pysql.Toplist163.song_id == music['id'],
                        pysql.Toplist163.playlist_id == link).count()
                    mcnt = self.session.query(pysql.Toplist163).filter(
                        pysql.Toplist163.mailed == "Y",
                        pysql.Toplist163.song_id == music['id'],
                        pysql.Toplist163.playlist_id == link).count()
                    if cnt == 0:
                        mailed = "N"
                        if mcnt > 0:
                            mailed = "Y"
                        self.session.add(
                            pysql.Toplist163(song_id=music['id'],
                                             song_name=name,
                                             author=",".join(authors),
                                             playTime=play_time,
                                             position=position,
                                             playlist_id=link,
                                             lastRank=lastrank,
                                             mailed=mailed,
                                             create_time=createTime,
                                             update_time=updateTime))
                        self.session.commit()

            print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format(
                len(musics), exist))
            self.session.query(pysql.Playlist163).filter(
                pysql.Playlist163.link == link).update({
                    'done':
                    'Y',
                    'update_time':
                    datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M")
                })
            self.session.commit()
            return songs
        except Exception as e:
            pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url))
            self.session.query(pysql.Playlist163).filter(
                pysql.Playlist163.link == url).update({
                    'done':
                    'E',
                    'update_time':
                    datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M")
                })
            self.session.commit()