Esempio n. 1
0
def save_artist(group_id, initial):
    params = {'id': group_id, 'initial': initial}
    r = requests.get('http://music.163.com/discover/artist/cat', params=params)

    # 网页解析
    soup = BeautifulSoup(r.content.decode(), 'html.parser')
    body = soup.body

    hot_artists = body.find_all('a', attrs={'class': 'msk'})
    artists = body.find_all('a', attrs={'class': 'nm nm-icn f-thide s-fc0'})

    for artist in hot_artists:
        artist_id = artist['href'].replace('/artist?id=', '').strip()
        artist_name = artist['title'].replace('的音乐', '')
        try:
            sql.insert_artist(artist_id, artist_name)
        except Exception as e:
            # 打印错误日志
            print(e)

    for artist in artists:
        artist_id = artist['href'].replace('/artist?id=', '').strip()
        artist_name = artist['title'].replace('的音乐', '')
        try:
            sql.insert_artist(artist_id, artist_name)
        except Exception as e:
            # 打印错误日志
            print(e)
Esempio n. 2
0
def save_artist(group_id, initial):
    params = {'id': group_id, 'initial': initial}
    r = requests.get('http://music.163.com/discover/artist/cat', params=params)

    # 网页解析
    soup = BeautifulSoup(r.content.decode(), 'html.parser')
    body = soup.body

    hot_artists = body.find_all('a', attrs={'class': 'msk'})
    artists = body.find_all('a', attrs={'class': 'nm nm-icn f-thide s-fc0'})

    for artist in hot_artists:
        artist_id = artist['href'].replace('/artist?id=', '').strip()
        artist_name = artist['title'].replace('的音乐', '')
        try:
            sql.insert_artist(artist_id, artist_name)
        except Exception as e:
            # 打印错误日志
            print(e)

    for artist in artists:
        artist_id = artist['href'].replace('/artist?id=', '').strip()
        artist_name = artist['title'].replace('的音乐', '')
        try:
            sql.insert_artist(artist_id, artist_name)
        except Exception as e:
            # 打印错误日志
            print(e)
Esempio n. 3
0
def save_artist(group_id, initial):
    params = {'id': group_id, 'initial': initial}
    print("== 爬取数据参数 ==", params)
    r = requests.get('https://music.163.com/discover/artist/cat',
                     headers=headers,
                     params=params)
    # 网页解析
    soup = BeautifulSoup(r.content.decode(), 'html.parser')
    # print(soup)
    labels = soup.find_all('a', attrs={'class': 'cat-flag'})
    hot_artists = soup.find_all('a', attrs={'class': 'msk'})
    artists = soup.find_all('a', attrs={'class': 'nm nm-icn f-thide s-fc0'})

    if not group_id:
        for label in labels:
            try:
                group_id = label['data-cat']
                if group_id:
                    labelList.add(group_id)
            except KeyError as e:
                # error: has not attribute
                print(e)
        print(labelList)

    for artist in hot_artists:
        artist_id = artist['href'].replace('/artist?id=', '').strip()
        artist_name = artist['title'].replace('的音乐', '')
        try:
            sql.insert_artist(artist_id, artist_name)
        except Exception as e:
            # 打印错误日志
            print(e)

    for artist in artists:
        artist_id = artist['href'].replace('/artist?id=', '').strip()
        artist_name = artist['title'].replace('的音乐', '')
        try:
            sql.insert_artist(artist_id, artist_name)
        except Exception as e:
            # 打印错误日志
            print(e)