Example #1
0
def parse_cdbaby_album(album_id):
    url = 'http://www.cdbaby.com/cd/%s' % (album_id,)
    page = fetch_page(url, html=True)

    release = {}

    album = guess_case_title(page.find('span', {'id': 'ctl00_rightColumn_lblAlbumName'}).text)
    album_id = extract_album_id(url)
    artist = guess_case(page.find('div', {'id': 'ctl00_rightColumn_pnlArtists'}).find('span').text)
    artist_id = extract_artist_id(page.find('a', {'id': 'ctl00_breadCrumb_lnkArtist'})['href'])
    barcode = page.find('span', {'id': 'ctl00_rightColumn_lblBarcode'})
    if barcode:
        barcode = extract_barcode(barcode.text)
    release_date = int(page.find('span', {'id': 'ctl00_leftColumn_lblAlbumRelease'}).text)
    record_label = page.find('span', {'id': 'ctl00_rightColumn_lblRecordLabel'})
    if record_label:
        record_label = re.sub(r'^Record Label: ', '', record_label.text)
        if artist.lower() == record_label.lower():
            record_label = None

    release = {
        'cdbaby_id': album_id,
        'title': album,
        'artist': artist,
        'artist_cdbaby_id': artist_id,
        'barcode': barcode,
        'date': str(release_date),
    }
    if record_label:
        release['label'] = record_label

    medium = {
        'position': 1,
        'tracks': [],
    }
    release['mediums'] = [medium]
    for tr in page.find('table', {'id': 'tracks-display'}).findAll('tr'):
        tds = tr.findAll('td')
        if len(tds) != 4:
            continue
        track_no, track_title = parse_track_title(tds[1].text)
        track_length = parse_track_length(tds[2].find('span').text)
        medium['tracks'].append({
            'position': track_no,
            'title': track_title,
            'length': track_length,
        })

    if page.find('input', {'class': 'cd-buynow-button'}):
        medium['format'] = 'CD'
    else:
        medium['format'] = 'Digital Media'

    return release
Example #2
0
def parse_track_title(s):
    m = re.match(r'^(\d+). (.+)$', s)
    track_no, track_title = m.groups()
    return int(track_no), guess_case_title(track_title)