Beispiel #1
0
def parse_genre(url):
    soup = fetch_soup(url)
    name, engname = left_partition(phyhtml.tag_of_class(soup, 'h3', 'bigtext').text.strip())
    lines = []
    for soup in phyhtml.tag_of_class(soup, 'div', 'content fold'):
        if soup.name is None:
            lines.append(soup.strip())
        else:
            lines.append('<br/>')
    intro = '\n'.join(lines)
    return name, engname, intro
Beispiel #2
0
def search_artist(key, func, page=None, size=30, filter=None):
    if page is not None:
        return fetch_artists(SEARCH_SONG_URL_PAGED.format(page, key), func, filter)
    soup = fetch_soup(SEARCH_SONG_URL.format(key))
    total = int(phyhtml.tag_of_class(soup, 'p', 'seek_counts ok').next.next.text)
    for page in range(1, int(math.ceil(total / size)) + 1):
        search_artist(key, func, page, size, filter)
Beispiel #3
0
def search_album(key, func, page=None, size=30, filter=None):
    if page is not None:
        print("fetch album in page:", page)
        fetch_albums(SEARCH_ALBUM_URL_PAGED.format(page, key), func, filter)
    soup = fetch_soup(SEARCH_ALBUM_URL.format(key))
    total = int(phyhtml.tag_of_class(soup, 'p', 'seek_counts').next.next.text)
    count = int(math.ceil(total / size))
    print("found", total, 'albums in', count, 'pages')
    for page in range(1, count + 1):
        search_album(key, func, page, size, filter)
Beispiel #4
0
def parse_artist(url):
    soup = fetch_soup(url)
    name = alias = None
    for tag in soup.find('h1'):
        if tag.name is None:
            name = tag.strip()
        else:
            alias = tag.text.strip().strip('“”"')
    location = profile = label = None
    genres = []
    for tag in soup.find('div', {'id': 'artist_info'}).find_all('td'):
        if label is not None:
            if label == '地区:':
                location = right_partition(tag.text.strip())
            elif label == '风格:':
                for a in tag.find_all('a'):
                    genres.append((left_partition(a.text.strip()), escape_href(a['href'], soup)))
            elif label == '档案:':
                a = phyhtml.tag_of_class(tag, 'a', 'more')
                if a:
                    profile = escape_href(a['href'], soup)
            label = None
        else:
            label = tag.text.strip()

    cover = soup.find('a', dict(id='cover_lightbox'))['href']
    intro = None
    if profile:
        soup = fetch_soup(profile)
        div = phyhtml.tag_of_class(soup, 'div', 'profile')
        lines = []
        if div:
            for tag in div.find_all('p'):
                lines.append(str(tag))
        else:
            for tag in soup.find('div', dict(id='artist-record')):
                if tag.name is None:
                    lines.append(tag.strip())
                elif tag.name == 'br':
                    lines.append('<br/>')
        intro = ''.join(lines)
    return name, alias, cover, location, genres, intro
Beispiel #5
0
def parse_song(url):
    soup = fetch_soup(url)
    title = alias = None
    for tag in soup.find('h1'):
        if tag.name is None:
            title = tag.strip()
        else:
            alias = tag.text.strip().strip('“”"')
    main = soup.find('div', dict(id='main'))
    label = album = lyricist = composer = arranger = None
    artists = []
    for tag in phyhtml.tag_of_class(main, 'div', 'album_relation').find_all('td'):
        text = tag.text.strip()
        if label is not None:
            if label == '所属专辑:':
                a = tag.find('a')
                album = (text, escape_href(a['href'], soup) if a else None)
            elif label == '作词:':
                lyricist = text
            elif label == '演唱者:':
                for a in tag.find_all('a'):
                    name = a.text.strip()
                    if name:
                        artists.append((name, escape_href(a['href'], soup)))
            elif label == '作曲:':
                composer = text
            elif label == '编曲:':
                arranger = text
            label = None
        else:
            label = text
    lines = []
    for tag in phyhtml.tag_of_class(main, 'div', 'lrc_main'):
        if tag.name is None:
            lines.append(tag.strip())
        else:
            lines.append('<br/>')
    lyric = ''.join(lines)
    return title, alias, album, artists, lyricist, composer, arranger, lyric
Beispiel #6
0
 def do_filter(li):
     a = phyhtml.tag_of_class(li, 'a', 'singer')
     if a:
         return a.text == artist
Beispiel #7
0
def fetch_albums(url, func, filter=None):
    soup = fetch_soup(url)
    for tag in phyhtml.tag_of_class(soup, 'div', 'albumBlock_list').find_all('li'):
        if not filter or filter(tag):
            func(parse_album(phyhtml.tag_of_class(tag, 'p', 'cover').next['href']))