def save_music(self, album_id): params = {'id': album_id} # 获取专辑对应的页面 r = requests.get('http://music.163.com/album', headers=self.headers, params=params) # 网页解析 soup = BeautifulSoup(r.content.decode(), 'html.parser') body = soup.body album_name = body.find('div', attrs={'class': 'tit'}).find('h2').text musics = body.find('ul', attrs={ 'class': 'f-hide' }).find_all('li') # 获取专辑的所有音乐 music_list = [] for music in musics: music = music.find('a') music_id = music['href'].replace('/song?id=', '') music_name = music.getText() music_list.append(music_name) print("专辑id:%s, 专辑名:%s" % (album_id, album_name)) print(music_list) for music in musics: music = music.find('a') music_id = music['href'].replace('/song?id=', '') music_name = music.getText() sql.insert_music(music_id, music_name, album_id, album_name)
def save_music(self, album_id): # 获取歌手数据 try: item = sql.get_artist(album_id) except Exception as e: # 打印错误日志 print(e) params = {'id': album_id} # 获取专辑对应的页面 r = requests.get('http://music.163.com/album', headers=self.headers, params=params) # 网页解析 soup = BeautifulSoup(r.content.decode(), 'html.parser') body = soup.body musics = body.find('ul', attrs={ 'class': 'f-hide' }).find_all('li') # 获取专辑的所有音乐 for music in musics: music = music.find('a') music_id = music['href'].replace('/song?id=', '') music_name = music.getText() sql.insert_music(music_id, music_name, album_id, item[0], item[1])
def save_comments(musics, flag, connection0): for i in musics: try: comments = my_music.save_content(i["music_id"],flag) # comments = my_music.save_content() # print(comments) if comments["total"] > 0 : hotList = [] for j in comments['hotComments']: hotList.append(j["content"]) sql.insert_music(i["music_id"], comments['total'], ':'.join(hotList), connection0) except Exception as e: # 打印错误日志 print("error:"+str(i) + ': ' + str(e)) # print(str(e)) time.sleep(5)
def save_music(self,album_id): params = {'id': album_id} r = requests.get('http://music.163.com/album', headers=self.headers, params=params) r.encoding = 'utf-8' # self.writeFile(r"./music.txt", r.content) # print(r.text[200:30000]) soup = BeautifulSoup(r.content, 'html.parser') body = soup.body all_music = body.find('ul', attrs={'class': 'f-hide'}).find_all('a') # i["href"] = '/album?id=none' for i in all_music: music_id = i["href"].replace('/song?id=', '').strip() music_name = i.string # name = i.string print(music_id,album_id,music_name) sql.insert_music(music_name,album_id,music_id)
def save_music(self, artist_id): params = {'id': artist_id} # 获取专辑对应的页面 r = requests.get('http://music.163.com/artist', headers=self.headers, params=params) # 网页解析 soup = BeautifulSoup(r.content.decode('utf-8'), 'html.parser') body = soup.body musics = body.find('ul', attrs={ 'class': 'f-hide' }).find_all('li') # 获取专辑的所有音乐 artist_name = body.find('h2', attrs={'id': 'artist-name'}).getText() for music in musics: music = music.find('a') music_id = music['href'].replace('/song?id=', '') music_name = music.getText() sql.insert_music(music_id, music_name, artist_name)
def save_music(self, album_id): params = {'id': album_id} # 获取专辑对应的页面 # 这里做一个cookie切换 num = random.randint(0, 2) if num == 0: r = requests.get('http://music.163.com/album', headers=self.headers_one, params=params) elif num == 1: r = requests.get('http://music.163.com/album', headers=self.headers_two, params=params) else: r = requests.get('http://music.163.com/album', headers=self.headers_there, params=params) # 网页解析 soup = BeautifulSoup(r.content.decode(), 'html.parser') body = soup.body # 取到ul class 然后再找li musics = body.find('ul', attrs={'class': 'f-hide'}).find_all('li') # 获取专辑的所有音乐 for music in musics: music = music.find('a') music_id = music['href'].replace('/song?id=', '') music_name = music.getText() print(num) print(music_name) sql.insert_music(music_id, music_name, album_id)
def save_music(self, album_id): params = {'id': album_id} # 获取专辑对应的页面 r = requests.get('http://music.163.com/album', headers=self.headers, params=params, proxies=proxies) # 网页解析 soup = BeautifulSoup(r.content.decode(), 'html.parser') body = soup.body if body is not None: musics = body.find('ul', attrs={ 'class': 'f-hide' }).find_all('li') # 获取专辑的所有音乐 for music in musics: music = music.find('a') music_id = music['href'].replace('/song?id=', '') music_name = music.getText() print(music_id, music_name, album_id) sql.insert_music(music_id, music_name, album_id) else: print("Album {}'s music list's body is empty...".format(album_id))