def view_down(self, playlist_id, path="."): list = self.get_playlist(str(playlist_id)) msg = {"success": 0, "failed": 0, "failed_list": []} for music in list['tracks']: pylog.print_info( "正在下载歌曲 {}-{}.mp3".format( tools.encode(music['name']), tools.encode(music['artists'][0]['name']) ) ) link = self.get_mp3_link(music["id"]) if link is None: msg["failed"] = msg["failed"] + 1 msg["failed_list"].append(music) continue r = requests.get(link) with open("{}/{}-{}{}".format( path, tools.encode(music['name']).replace("/", "-"), tools.encode(music['artists'][0]['name']).replace("/", "-"), ".mp3" ), "wb") as code: code.write(r.content) msg["success"] = msg["success"] + 1 pylog.print_warn( "下载成功:{} 首,下载失败:{}首".format(msg["success"], msg["failed"]) ) tb = [["歌曲名字", "艺术家", "ID"]] for music in msg["failed_list"]: n = music['name'].encode("utf-8") a = music['artists'][0]['name'].encode("utf-8") i = music['id'] tb.append([n, a, i]) print(AsciiTable(tb).table)
def view_lyrics(self, count): song = [] for i in range(int(count / 10)): ms = self.session.query(pysql.Music163).filter( pysql.Music163.has_lyric == "N").limit(10) for m in ms: print("正在抓取歌词 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.view_lyric(m.song_id) song.append({ "name": m.song_name, "author": m.author, "comment": m.comment }) ms = self.session.query(pysql.Music163).filter( pysql.Music163.has_lyric == "N").limit(count % 10) for m in ms: print("正在抓取歌词 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.view_lyric(m.song_id) song.append({ "name": m.song_name, "author": m.author, "comment": m.comment }) return song
def view_capture(self, link): url = self.__url + str(link) songs = [] try: data = self.curl_playlist(link) musics = data['tracks'] exist = 0 for music in musics: name = tools.encode(music['name']) author = tools.encode(music['artists'][0]['name']) if music["bMusic"] is None: play_time = 0 else: play_time = music["bMusic"]["playTime"] if pysql.single("music163", "song_id", (music['id'])) is True: self.session.add(pysql.Music163(song_id=music['id'],song_name=name,author=author,playTime=play_time)) self.session.commit() exist = exist + 1 songs.append({"name": name,"author": author}) else: pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化")) print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format(len(musics), exist)) self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == link).update({'done': 'Y'}) self.session.commit() return songs except Exception as e: pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url)) self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == url).update({'done': 'E'}) self.session.commit() raise
def view_links(self, song_id): url = "http://music.163.com/song?id=" + str(song_id) data = {'id': str(song_id)} headers = { 'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;' # noqa } try: req = requests.get(url, headers=headers, data=data, timeout=100) sup = BeautifulSoup(req.content, "html.parser") for link in sup.find_all('li', class_="f-cb"): html = link.find('a', 's-fc1') if html is not None: title = tools.encode(html.get('title')) song_id = html.get('href')[9:] author = tools.encode(link.find( 'div', 'f-thide s-fc4' ).find('span').get('title')) if pysql.single("music163", "song_id", song_id) is True: self.session.add(pysql.Music163( song_id=song_id, song_name=title, author=author )) self.session.flush() for link in sup.find_all('a', 'sname f-fs1 s-fc0'): play_link = link.get("href").replace("/playlist?id=", "") play_name = tools.encode(link.get("title")) if pysql.single("playlist163", "link", play_link) is True: self.session.add(pysql.Playlist163( title=play_name, link=play_link, cnt=-1, dsc="来源:热评" )) self.session.flush() except Exception as e: pylog.log.error("解析页面推荐时出现问题:{} 歌曲ID:{}".format(e, song_id))
def view_capture(self, link): self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == link).update({'over': 'Y'}) url = self.__url + str(link) s = requests.session() songs = [] try: s = BeautifulSoup( s.get(url, headers=self.__headers).content, "html.parser") musics = json.loads(s.text)['result']['tracks'] exist = 0 for music in musics: name = tools.encode(music['name']) author = tools.encode(music['artists'][0]['name']) if pysql.single("music163", "song_id", (music['id'])) is True: self.session.add( pysql.Music163(song_id=music['id'], song_name=name, author=author)) self.session.commit() exist = exist + 1 songs.append({"name": name, "author": author}) else: pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化")) print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format( len(musics), exist)) return songs except Exception as e: pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url)) raise
def view_capture(self, song_id, page=1): if page == 1: self.session.query(pysql.Comment163).filter( pysql.Comment163.song_id == song_id).delete() self.session.commit() try: data = self.post(song_id, page) for comment in data['comments']: if comment['likedCount'] > 30: txt = tools.encode(comment['content']) author = tools.encode(comment['user']['nickname']) liked = comment['likedCount'] self.session.add( pysql.Comment163(song_id=song_id, txt=txt, author=author, liked=liked)) self.session.flush() if page == 1: for comment in data['hotComments']: txt = tools.encode(comment['content']) author = tools.encode(comment['user']['nickname']) liked = comment['likedCount'] self.session.add( pysql.Comment163(song_id=song_id, txt=txt, author=author, liked=liked)) self.session.flush() cnt = int(data['total']) self.session.query(pysql.Music163).filter( pysql.Music163.song_id == song_id).update({ 'done': 'Y', 'comment': cnt, 'update_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M") }) if self.music_type == self.Official: self.session.query(pysql.Toplist163).filter( pysql.Toplist163.song_id == song_id).update({ 'done': 'Y', 'comment': cnt }) self.session.commit() return cnt / 20 except Exception as e: self.session.rollback() self.session.query(pysql.Music163).filter( pysql.Music163.song_id == song_id).update({ 'done': 'E', 'comment': -2 }) self.session.commit() pylog.log.error("解析歌曲评论的时候出现问题:{} 歌曲ID:{} 页码:{}".format( e, song_id, page)) raise
def auto_view(self, count=1): song = [] if count < 10: msc = self.session.query( pysql.Music163).filter(pysql.Music163.done == "N").order_by( pysql.Music163.id).limit(count) for m in msc: try: print("抓取热评 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.views_capture(m.song_id, 1, 1) song.append({ "name": m.song_name, "author": m.author, "song_id": m.song_id }) except Exception as e: self.session.rollback() pylog.log.error("自动抓取热评出现异常:{} 歌曲ID:{}".format( e, m.song_id)) else: for i in range(int(count / 10)): msc = self.session.query(pysql.Music163).filter( pysql.Music163.done == "N").limit(10) for m in msc: try: print("抓取热评 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.views_capture(m.song_id, 1, 1) song.append({ "name": m.song_name, "author": m.author, "song_id": m.song_id }) except Exception as e: self.session.rollback() pylog.log.error("自动抓取热评出现异常:{} 歌曲ID:{}".format( e, m.song_id)) msc = self.session.query( pysql.Music163).filter(pysql.Music163.done == "N").order_by( pysql.Music163.id).limit(count % 10) for m in msc: try: print("抓取热评 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.views_capture(m.song_id, 1, 1) song.append({ "name": m.song_name, "author": m.author, "song_id": m.song_id }) except Exception as e: self.session.rollback() pylog.log.error("自动抓取热评出现异常:{} 歌曲ID:{}".format( e, m.song_id)) return song
def view_capture(self, song_id, page=1): if page == 1: self.session.query(pysql.Comment163).filter( pysql.Comment163.song_id == song_id).delete() self.session.commit() data = { 'params': self.createParams(page), 'encSecKey': self.__encSecKey } url = uapi.comment_url.format(song_id) try: req = requests.post(url, headers=self.__headers, data=data, timeout=10) for comment in req.json()['comments']: if comment['likedCount'] > 30: txt = tools.encode(comment['content']) author = tools.encode(comment['user']['nickname']) liked = comment['likedCount'] self.session.add( pysql.Comment163(song_id=song_id, txt=txt, author=author, liked=liked)) self.session.flush() if page == 1: for comment in req.json()['hotComments']: txt = tools.encode(comment['content']) author = tools.encode(comment['user']['nickname']) liked = comment['likedCount'] self.session.add( pysql.Comment163(song_id=song_id, txt=txt, author=author, liked=liked)) self.session.flush() cnt = int(req.json()['total']) self.session.query(pysql.Music163).filter( pysql.Music163.song_id == song_id).update({ 'over': 'Y', 'comment': cnt }) self.session.commit() return cnt / 20 except Exception as e: self.session.rollback() self.session.query(pysql.Music163).filter( pysql.Music163.song_id == song_id).update({ 'over': 'Y', 'comment': -2 }) self.session.commit() pylog.log.error("解析歌曲评论的时候出现问题:{} 歌曲ID:{} 页码:{}".format( e, song_id, page)) raise
def views_capture(self,source=None): playlist = {} if source is None: urls = self.session.query(pysql.Playlist163).filter(pysql.Playlist163.done == 'N').order_by(pysql.Playlist163.id).limit(10) else: if source.startswith("曲风:") is False: source = "曲风:" + source urls = self.session.query(pysql.Playlist163).filter(pysql.Playlist163.done == 'N',pysql.Playlist163.dsc==source).order(pysql.Playlist163.id).limit(1) for url in urls: print("正在抓取歌单《{}》的歌曲……".format(tools.encode(url.title))) songs = self.view_capture(url.link) playlist[tools.encode(url.title)] = songs return playlist
def get_playlist(self, playlist_id): self.view_capture(int(playlist_id)) url = uapi.playlist_api.format(playlist_id) s = requests.session() s = BeautifulSoup( s.get(url, headers=self.__headers).content, "html.parser") playlist = json.loads(s.text)['result'] print("《" + tools.encode(playlist['name']) + "》") author = tools.encode(playlist['creator']['nickname']) pc = str(playlist['playCount']) sc = str(playlist['subscribedCount']) rc = str(playlist['shareCount']) cc = str(playlist['commentCount']) with tools.ignored(Exception): print("维护者:{} 播放:{} 关注:{} 分享:{} 评论:{}".format( author, pc, sc, rc, cc)) print("描述:{}".format(tools.encode(playlist['description']))) print("标签:{}".format(",".join(tools.encode(playlist['tags'])))) tb = [["ID", "歌曲名字", "艺术家", "唱片"]] for music in playlist['tracks']: artists = [] for s in music['artists']: artists.append(s['name']) ms = tools.encode(music['name']) ar = tools.encode(",".join(artists)) ab = tools.encode(music['album']['name']) id = music['id'] tb.append([id, ms, ar, ab]) print(AsciiTable(tb).table)
def get_playlist(self, playlist_id): self.view_capture(int(playlist_id)) playlist = self.curl_playlist(playlist_id) print("《" + tools.encode(playlist['name']) + "》") author = tools.encode(playlist['creator']['nickname']) pc = str(playlist['playCount']) sc = str(playlist['subscribedCount']) rc = str(playlist['shareCount']) cc = str(playlist['commentCount']) with tools.ignored(Exception): print("维护者:{} 播放:{} 关注:{} 分享:{} 评论:{}".format( author, pc, sc, rc, cc)) print("描述:{}".format(tools.encode(playlist['description']))) print("标签:{}".format(",".join(tools.encode(playlist['tags'])))) tb = [["ID", "歌曲名字", "艺术家", "唱片"]] for music in playlist['tracks']: artists = [] for s in music['artists']: artists.append(s['name']) ms = tools.encode(music['name']) ar = tools.encode(",".join(artists)) ab = tools.encode(music['album']['name']) id = music['id'] tb.append([id, ms, ar, ab]) print(AsciiTable(tb).table)
def searchPlaylist(key): url = uapi.search_api data = {'s': key, 'offset': 0, 'limit': 5, 'type': "1000"} req = requests.post(url, headers=uapi.header, data=data, timeout=10) if req.json()["result"]['playlistCount'] == 0: pylog.log.warn("关键词 {} 没有可搜索歌单".format(key)) return playlists = req.json()["result"]['playlists'] song_table = AsciiTable([["ID", "歌单", "维护者", "播放数量", "收藏数量"]]) for item in playlists: id = str(item['id']) name = tools.encode(item['name']) creator = tools.encode(item['creator']['nickname']) pcount = str(item['playCount']) bcount = str(item['bookCount']) song_table.table_data.append([id, name, creator, pcount, bcount]) print(pylog.Blue("与 \"{}\" 有关的歌单".format(key))) print(song_table.table)
def searchSong(key): url = uapi.search_api data = {'s': key, 'offset': 0, 'limit': 20, 'type': "1"} req = requests.post(url, headers=uapi.header, data=data, timeout=10) if req.json()["result"]['songCount'] == 0: pylog.log.warn("关键词 {} 没有可搜索歌曲".format(key)) return songs = req.json()["result"]['songs'] song_table = AsciiTable([["ID", "歌曲", "专辑", "演唱"]]) for item in songs: id = item['id'] name = tools.encode(item['name']) album = tools.encode(item['album']['name']) artist = [] for a in item['artists']: artist.append(tools.encode(a['name'])) song_table.table_data.append([str(id), name, album, ",".join(artist)]) print(pylog.Blue("与 \"{}\" 有关的歌曲".format(key))) print(song_table.table)
def searchAlbum(key): url = uapi.search_api data = {'s': key, 'offset': 0, 'limit': 20, 'type': "10"} req = requests.post(url, headers=uapi.header, data=data, timeout=10) if req.json()["result"]['albumCount'] == 0: pylog.log.warn("关键词 {} 没有可搜索专辑".format(key)) return albums = req.json()["result"]['albums'] song_table = AsciiTable([["ID", "专辑", "演唱", "发行方"]]) for item in albums: id = item['id'] name = tools.encode(item['name']) company = "" if item['company'] != None: company = tools.encode(item['company']) artist = [] for a in item['artists']: artist.append(tools.encode(a['name'])) song_table.table_data.append( [str(id), name, ",".join(artist), company]) print(pylog.Blue("与 \"{}\" 有关的专辑".format(key))) print(song_table.table)
def auto_view(self, count=1): song = [] if self.music_type == self.Common: msc = self.session.query( pysql.Music163).filter(pysql.Music163.done == "N").order_by( pysql.Music163.id).limit(count) for m in msc: try: print("抓取热评 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.views_capture(m.song_id, 1, 1) song.append({ "name": m.song_name, "author": m.author, "song_id": m.song_id }) except Exception as e: pylog.log.error("自动抓取热评出现异常:{} 歌曲ID:{}".format( e, m.song_id)) elif self.music_type == self.Official: msc = self.session.query(pysql.Toplist163).filter( pysql.Toplist163.done == "N").order_by( pysql.Toplist163.id).limit(count) for m in msc: try: print("抓取官方榜单歌曲热评 ID {} 歌曲 {}".format( m.song_id, pylog.Blue(tools.encode(m.song_name)))) self.views_capture(m.song_id, 1, 2) # 意味着每一页的评论都抓取 song.append({ "name": m.song_name, "author": m.author, "song_id": m.song_id }) except Exception as e: pylog.log.error("自动抓取官方榜单热评出现异常:{} 歌曲ID:{}".format( e, m.song_id)) return song
def view_capture(self, page, type="全部"): play_url = self.__play_url.format(type, page * 35) titles = [] try: acmsk = {'class': 'msk'} scnb = {'class': 'nb'} dcu = {'class': 'u-cover u-cover-1'} ucm = {'class': 'm-cvrlst f-cb'} data = tools.curl(play_url,self.__headers,type=const.RETURE_HTML) lst = data.find('ul', ucm) for play in lst.find_all('div', dcu): title = tools.encode(play.find('a', acmsk)['title']) link = tools.encode(play.find('a', acmsk)['href']).replace("/playlist?id=", "") cnt = tools.encode(play.find('span', scnb).text).replace('万', '0000') if pysql.single("playlist163","link",link) is True: pl = pysql.Playlist163(title=title, link=link, cnt=int(cnt), dsc="曲风:{}".format(type)) self.session.add(pl) self.session.commit() titles.append(title) return titles except Exception as e: pylog.log.error("抓取歌单出现问题:{} 歌单类型:{} 页码:{}".format(e, type, page)) raise
def searchSinger(key): url = uapi.search_api data = {'s': key, 'offset': 0, 'limit': 10, 'type': "100"} req = requests.post(url, headers=uapi.header, data=data, timeout=10) if req.json()["result"]['artistCount'] == 0: pylog.log.warn("关键词 {} 没有可搜索艺术家".format(key)) return artists = req.json()["result"]['artists'] song_table = AsciiTable([["ID", "姓名", "专辑数量", "MV数量"]]) for item in artists: id = str(item['id']) name = tools.encode(item['name']) acount = str(item['albumSize']) mcount = str(item['mvSize']) song_table.table_data.append([id, name, acount, mcount]) print(pylog.Blue("与 \"{}\" 有关的歌手".format(key))) print(song_table.table)
def get_music(self, music_id): self.view_capture(int(music_id), 1) url = uapi.music_api.format(music_id, music_id) data = tools.curl(url,self.__headers) music = data['songs'] print("《" + tools.encode(music[0]['name']) + "》") author = [] for a in music[0]['artists']: author.append(tools.encode(a['name'])) album = str(tools.encode(music[0]['album']['name'])) print("演唱:{} 专辑:{}".format(",".join(author), album)) comments = self.session.query(pysql.Comment163).filter( pysql.Comment163.song_id == int(music_id) ) tb = AsciiTable([["序号", "作者", "评论", "点赞"]]) max_width = tb.column_max_width(2) - tb.column_max_width(2) % 3 cnt = 0 try: for cmt in comments: cnt = cnt + 1 au = tools.encode(cmt.author) txt = "" length = 0 for u in cmt.txt: txt = txt + u if ord(u) < 128: length = length + 3 else: length = length + 1 if length == max_width: txt = txt + "\n" length = 0 liked = str(cmt.liked) tb.table_data.append([str(cnt), str(au), str(txt), liked]) print(tb.table) except UnicodeEncodeError: pylog.log.info("获取歌曲详情编码存在问题,转为非表格形式,歌曲ID:{}".format(music_id)) for cmt in comments: print("评论: {}".format(tools.encode(cmt.txt))) print( "作者: {} 点赞: {}".format( tools.encode(cmt.author), str(cmt.liked) ) ) print("") except Exception as e: pylog.print_warn("获取歌曲时出现异常: {} 歌曲ID:{}".format(e, music_id))
def get_music(): mu = music.Music() data = mu.views_capture(tools.encode(request.form["gdSource"])) return jsonify({"type": request.form["gdSource"], "data": data})
def get_playlist(): pl = playlist.Playlist() title = pl.view_capture(int(request.form['gdPage']), tools.encode(request.form["gdType"])) return jsonify({"type": request.form["gdType"], "title": title})
def view_capture(self, link): url = self.__url + str(link) songs = [] try: data = self.curl_playlist(link) musics = data['tracks'] exist = 0 for music in musics: name = tools.encode(music['name']) authors = [] for art in music['artists']: authors.append(tools.encode(art['name'])) if music["bMusic"] is None: play_time = 0 else: play_time = music["bMusic"]["playTime"] if pysql.single("music163", "song_id", (music['id'])) is True: self.session.add( pysql.Music163(song_id=music['id'], song_name=name, author=",".join(authors), playTime=play_time)) self.session.commit() exist = exist + 1 songs.append({"name": name, "author": ",".join(authors)}) else: pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化")) # 处理官方榜单 if int(link) in uapi.top.keys(): updateTime = datetime.datetime.fromtimestamp( data['updateTime'] / 1000).strftime("%Y-%m-%d %H:%M:%S") createTime = datetime.datetime.fromtimestamp( data['createTime'] / 1000).strftime("%Y-%m-%d %H:%M:%S") position = music['position'] lastrank = 100000000 with tools.ignored(Exception): lastrank = music['lastRank'] cnt = self.session.query(pysql.Toplist163).filter( pysql.Toplist163.update_time == updateTime, pysql.Toplist163.song_id == music['id'], pysql.Toplist163.playlist_id == link).count() mcnt = self.session.query(pysql.Toplist163).filter( pysql.Toplist163.mailed == "Y", pysql.Toplist163.song_id == music['id'], pysql.Toplist163.playlist_id == link).count() if cnt == 0: mailed = "N" if mcnt > 0: mailed = "Y" self.session.add( pysql.Toplist163(song_id=music['id'], song_name=name, author=",".join(authors), playTime=play_time, position=position, playlist_id=link, lastRank=lastrank, mailed=mailed, create_time=createTime, update_time=updateTime)) self.session.commit() print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format( len(musics), exist)) self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == link).update({ 'done': 'Y', 'update_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M") }) self.session.commit() return songs except Exception as e: pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url)) self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == url).update({ 'done': 'E', 'update_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M") }) self.session.commit()