Example #1
0
    def search(self, album, artists):
        ret = []
        if len(artists) > 1:
            artist = u'Various Artists'
        else:
            if hasattr(artists, 'items'):
                artist = artists.keys()[0]
            else:
                artist = artists[0]
            
        if self._useid and hasattr(artists, 'values'):
            tracks = []
            [tracks.extend(z) for z in artists.values()]
            for field in ('amg_rovi_id', 'amg_pop_id', 'amgsqlid', 'amg_album_id',):
                album_id = find_id(tracks, field)
                if album_id:
                    break

            if not isempty(album_id):
                write_log(u'Found Album ID %s' % album_id)
                try:
                    return self.keyword_search(u':id %s' % album_id)
                except OldURLError:
                    write_log("Invalid URL used. Doing normal search.")

        if not album:
            raise RetrievalError('Album name required.')

        write_log(u'Searching for %s' % album)
        try:
            searchpage = search(album)
        except urllib2.URLError, e:
            write_log(u'Error: While retrieving search page %s' % 
                        unicode(e))
            raise RetrievalError(unicode(e))
Example #2
0
    def search(self, album, artists):
        ret = []
        if len(artists) > 1:
            artist = 'Various Artists'
        else:
            if hasattr(artists, 'items'):
                artist = list(artists.keys())[0]
            else:
                artist = artists[0]

        if self._useid and hasattr(artists, 'values'):
            tracks = []
            [tracks.extend(z) for z in artists.values()]
            for field in ('amg_rovi_id', 'amg_pop_id', 'amgsqlid', 'amg_album_id',):
                album_id = find_id(tracks, field)
                if album_id:
                    break

            if not isempty(album_id):
                write_log('Found Album ID %s' % album_id)
                try:
                    return self.keyword_search(':id %s' % album_id)
                except OldURLError:
                    write_log("Invalid URL used. Doing normal search.")

        if not album:
            raise RetrievalError('Album name required.')

        write_log('Searching for %s' % album)
        try:
            searchpage = search(album)
        except urllib.error.URLError as e:
            write_log('Error: While retrieving search page %s' %
                      str(e))
            raise RetrievalError(str(e))
        write_log('Retrieved search results.')

        search_results = parse_searchpage(searchpage, artist, album)
        if search_results:
            matched, matches = search_results
        else:
            return []

        if matched and len(matches) == 1:
            ret = [(matches[0], [])]
        elif matched:
            write_log('Ambiguous matches found for: %s - %s' %
                      (artist, album))
            ret.extend([(z, []) for z in matches])
        else:
            write_log('No exact matches found for: %s - %s' %
                      (artist, album))
            ret.extend([(z, []) for z in matches])
        return ret
Example #3
0
def parse_search_element(td, id_field=ALBUM_ID):
    """Parse search element td and returns dictionary with album info.

    Search pages contain all album info in a td element. This routine
    parses the element and returns all info in dictionary with
    the field as keys and value being the value.

    Returns a dictionary with at least the following keys:
    artist -- artist name found
    album -- album name found
    #albumurl -- link to album.
    #extrainfo -- tuple with first item description text and second item
                  a link to the album.
    year -- album release year."""

    def to_string(e):
        try:
            return convert(e.a.string)
        except AttributeError:
            try:
                return convert(e.string)
            except AttributeError:
                return ''

    info = {}

    album = td.find('div', {'class': 'title'})

    info['album'] = to_string(album)
    info['#albumurl'] = convert(album.a.element.attrib['href'])
    info['amg_url'] = info['#albumurl']

    info['artist'] = to_string(td.find('div', {'class': 'artist'}))

    if not info['artist']:
        artist = to_string(td.find('div', {'class': 'title'}))
        if ':' in artist:
            artist = [z.strip() for z in artist.split(':', 1)]
            info['artist'], info['album'] = artist
        else:
            info['album'] = artist

    info['year'] = to_string(td.find('div', {'class': 'year'}))
    info['genre'] = to_string(td.find('div', {'class': 'genres'}))

    info['#extrainfo'] = [
        info['album'] + ' at AllMusic.com', info['#albumurl']]

    info[id_field] = re.search(r'-(mw\d+)$', info['#albumurl']).groups()[0]

    return dict((k, v) for k, v in info.items() if not isempty(v))
Example #4
0
def capture_dict_values(d):
    ret = {}
    for key, v in d.items():
        if key in RELEASE_PROPERTIES_TO_IGNORE or isempty(v) or isinstance(
                v, dict):
            continue
        elif isinstance(v, bytes):
            ret[key] = v.decode('utf8')
        elif isinstance(v, numbers.Number):
            ret[key] = str(v)
        else:
            ret[key] = v

    return ret
Example #5
0
def parse_search_element(td, id_field=ALBUM_ID):
    """Parse search element td and returns dictionary with album info.

    Search pages contain all album info in a td element. This routine
    parses the element and returns all info in dictionary with
    the field as keys and value being the value.

    Returns a dictionary with at least the following keys:
    artist -- artist name found
    album -- album name found
    #albumurl -- link to album.
    #extrainfo -- tuple with first item description text and second item
                  a link to the album.
    year -- album release year."""

    
    def to_string(e):
        try: return convert(e.a.string)
        except AttributeError:
            try: return convert(e.string)
            except AttributeError: return u''
    
    info = {}

    album = td.find('div', {'class': 'title'})
    
    info['album'] = to_string(album)
    info['#albumurl'] = convert(album.a.element.attrib['href'])
    info['amg_url'] = info['#albumurl']

    info['artist'] = to_string(td.find('div', {'class': 'artist'}))

    if not info['artist']:
        artist = to_string(td.find('div', {'class': 'title'}))
        if u':' in artist:
            artist = [z.strip() for z in artist.split(u':', 1)]
            info['artist'], info['album'] = artist
        else:
            info['album'] = artist
    
    info['year'] = to_string(td.find('div', {'class': 'year'}))
    info['genre'] = to_string(td.find('div', {'class': 'genres'}))

    info['#extrainfo'] = [
        info['album'] + u' at AllMusic.com', info['#albumurl']]

    info[id_field] = re.search('-(mw\d+)$', info['#albumurl']).groups()[0]
    
    return dict((k,v) for k, v in info.iteritems() if not isempty(v))
Example #6
0
def check_values(d):
    ret = {}
    for key, v in d.iteritems():
        if key in INVALID_KEYS or isempty(v):
            continue
        if hasattr(v, '__iter__') and hasattr(v, 'items'):
            continue
        elif not hasattr(v, '__iter__'):
            v = unicode(v)
        elif isinstance(v, str):
            v = v.decode('utf8')

        ret[key] = v

    return ret
Example #7
0
def check_values(d):
    ret = {}
    for key, v in d.iteritems():
        if key in INVALID_KEYS or isempty(v):
            continue
        if hasattr(v, '__iter__') and hasattr(v, 'items'):
            continue
        elif not hasattr(v, '__iter__'):
            v = unicode(v)
        elif isinstance(v, str):
            v = v.decode('utf8')

        ret[key] = v

    return ret
Example #8
0
def parse_albumpage(page, artist=None, album=None):

    info = {}

    album_soup = parse_html.SoupWrapper(parse_html.parse(page))

    artist = album_soup.find('div', {'class': 'album-artist'})
    album = album_soup.find('div', {'class': 'album-title'})

    release_title = album_soup.find('h3', 'release-title')

    if release_title:
        album = release_title
        details = album_soup.find('p', {'class': 'release-details'})
        if details:
            info['release'] = convert(details.string)

    if not artist:
        artist = album_soup.find('h3', 'release-artist')

    if album is None:
        info.update({'artist': convert(artist.string), 'album': ''})
    else:
        info.update({
            'artist': convert(artist.string),
            'album': convert(album.string)
        })
    info['albumartist'] = info['artist']

    sidebar = album_soup.find('div', {'class': 'sidebar'})
    info.update(parse_sidebar(sidebar))
    info.update(convert_year(info))

    content = album_soup.find('section', {'class': 'review read-more'})
    if content:
        info.update(parse_review(content))

    #swipe = main.find('div', {'id':"similar-albums", 'class':"grid-gallery"})

    #info.update(parse_similar(swipe))

    info = dict(
        (spanmap.get(k, k), v) for k, v in info.iteritems() if not isempty(v))

    return [info, parse_tracks(album_soup, info)]
Example #9
0
def parse_albumpage(page, artist=None, album=None):

    info = {}

    album_soup = parse_html.SoupWrapper(parse_html.parse(page))

    artist = album_soup.find('div', {'class': 'album-artist'})
    album = album_soup.find('div', {'class': 'album-title'})

    release_title = album_soup.find('h3', 'release-title')
    
    if release_title:
        album = release_title
        details = album_soup.find('p', {'class': 'release-details'})
        if details:
            info['release'] = convert(details.string)

    if not artist:
        artist = album_soup.find('h3', 'release-artist')

    if  album is None:
        info.update({'artist': convert(artist.string), 'album': ''})
    else:
        info.update({'artist': convert(artist.string), 'album': convert(album.string)})
    info['albumartist'] = info['artist']

    sidebar = album_soup.find('div', {'class': 'sidebar'})
    info.update(parse_sidebar(sidebar))
    info.update(convert_year(info))

    content = album_soup.find('section', {'class': 'review read-more'})
    if content:
        info.update(parse_review(content))

    #swipe = main.find('div', {'id':"similar-albums", 'class':"grid-gallery"})
    
    #info.update(parse_similar(swipe))
    
    info = dict((spanmap.get(k,k),v) for k, v in info.iteritems() if not isempty(v))
        
    return [info, parse_tracks(album_soup, info)]
Example #10
0
    def search(self, album, artists):
        ret = []
        if len(artists) > 1:
            artist = u'Various Artists'
        else:
            if hasattr(artists, 'items'):
                artist = artists.keys()[0]
            else:
                artist = artists[0]

        if self._useid and hasattr(artists, 'values'):
            tracks = []
            [tracks.extend(z) for z in artists.values()]
            for field in (
                    'amg_rovi_id',
                    'amg_pop_id',
                    'amgsqlid',
                    'amg_album_id',
            ):
                album_id = find_id(tracks, field)
                if album_id:
                    break

            if not isempty(album_id):
                write_log(u'Found Album ID %s' % album_id)
                try:
                    return self.keyword_search(u':id %s' % album_id)
                except OldURLError:
                    write_log("Invalid URL used. Doing normal search.")

        if not album:
            raise RetrievalError('Album name required.')

        write_log(u'Searching for %s' % album)
        try:
            searchpage = search(album)
        except urllib2.URLError, e:
            write_log(u'Error: While retrieving search page %s' % unicode(e))
            raise RetrievalError(unicode(e))
Example #11
0
def parse_track(tr, fields, performance_title=None):

    track = {}
    ignore = set(['pick-prefix', 'sample', 'stream', 'pick-suffix'])

    if tr.element.attrib.get('class') == 'perfomance-title':
        return convert(tr.string)

    for td, field in zip(tr.find_all('td'), fields):
        if field in ignore:
            continue
        elif field is None:
            field = td.element.attrib.get('class')
            if not field:
                continue

        sub_fields = td.find_all('div')
        if (sub_fields):
            for div in sub_fields:
                sub_field = div.element.attrib['class']
                if field == 'performer' and sub_field == 'primary':
                    sub_field = field
                elif field == 'performer' and sub_field != 'primary':
                    if sub_field == 'featuring':
                        track[field] = u'%s %s' % (track.get(
                            field, u''), convert(div.string))
                    else:
                        sub_field = 'composer'

                value = convert(div.string)
                track[sub_field] = value
        else:
            track[field] = convert(td.string)
    if performance_title and 'title' in track:
        track['title'] = performance_title + u': ' + track['title']
    if 'artist' not in track and 'performer' in track:
        track['artist'] = track['performer']
    return dict((spanmap.get(k, k), v) for k, v in track.iteritems()
                if spanmap.get(k, k) and not isempty(v))
Example #12
0
def parse_track(tr, fields, performance_title=None):

    track = {}
    ignore = set(['pick-prefix', 'sample', 'stream', 'pick-suffix'])

    if tr.element.attrib.get('class') == 'perfomance-title':
        return convert(tr.string)

    for td, field in zip(tr.find_all('td'), fields):
        if field in ignore:
            continue
        elif field is None:
            field = td.element.attrib.get('class')
            if not field:
                continue
        
        sub_fields = td.find_all('div')
        if (sub_fields):
            for div in sub_fields:
                sub_field = div.element.attrib['class']
                if field == 'performer' and sub_field == 'primary':
                    sub_field = field
                elif field == 'performer' and sub_field != 'primary':
                    if sub_field == 'featuring':
                        track[field] = u'%s %s' % (track.get(field, u'') ,convert(div.string))
                    else:
                        sub_field = 'composer'
                    
                value = convert(div.string)
                track[sub_field] = value
        else:
            track[field] = convert(td.string)                
    if performance_title and 'title' in track:
        track['title'] = performance_title + u': ' + track['title']
    if 'artist' not in track and 'performer' in track:
        track['artist'] = track['performer']
    return dict((spanmap.get(k,k),v) for k,v in track.iteritems() if spanmap.get(k,k) and not isempty(v))