Example #1
0
    def search(self, artist, files=None):
        if files is not None and self.searchby:
            keywords = format_value(files[0], self.searchby)
        else:
            keywords = artist
        keywords = re.sub(r'\s+', self._separator, keywords)

        if self.search_source is None:
            album = self.retrieve(keywords)
            return [album] if album else []

        url = self._search_base.replace('%s', keywords)

        write_log(translate('Mp3tag', 'Retrieving search page: %s') % url)
        set_status(translate('Mp3tag', 'Retrieving search page...'))
        if self.html is None:
            page = get_encoding(urlopen(url), True, 'utf8')[1]
        else:
            page = get_encoding(self.html, True, 'utf8')[1]

        write_log(translate('Mp3tag', 'Parsing search page.'))
        set_status(translate('Mp3tag', 'Parsing search page...'))
        infos = parse_search_page(self.indexformat, page, self.search_source,
                                  url)
        return [(info, []) for info in infos]
Example #2
0
def retrieve_album(url, coverurl=None, id_field=ALBUM_ID):
    write_log('Opening Album Page - %s' % url)
    album_page, code = urlopen(url, False, True)
    if album_page.find("featured new releases") >= 0:
        raise OldURLError("Old AMG URL used.")
    album_page = get_encoding(album_page, True, 'utf8')[1]

    info, tracks = parse_albumpage(album_page)
    info['#albumurl'] = url
    info['amg_url'] = url

    if 'album' in info:
        info['#extrainfo'] = [
            info['album'] + u' at AllMusic.com', info['#albumurl']
        ]

    if coverurl:
        try:
            write_log('Retrieving Cover - %s' % info['#cover-url'])
            cover = retrieve_cover(info['#cover-url'])
        except KeyError:
            write_log('No cover found.')
            cover = None
        except urllib2.URLError, e:
            write_log(u'Error: While retrieving cover %s - %s' %
                      (info['#cover-url'], unicode(e)))
            cover = None
Example #3
0
def retrieve_album(url, coverurl=None, id_field=ALBUM_ID):
    write_log('Opening Album Page - %s' % url)
    album_page, code = urlopen(url, False, True)
    if album_page.find("featured new releases") >= 0:
        raise OldURLError("Old AMG URL used.")
    album_page = get_encoding(album_page, True, 'utf8')[1]
    
    info, tracks = parse_albumpage(album_page)
    info['#albumurl'] = url
    info['amg_url'] = url

    if 'album' in info:
        info['#extrainfo'] = [
            info['album'] + u' at AllMusic.com', info['#albumurl']]

    if coverurl:
        try:
            write_log('Retrieving Cover - %s'  % info['#cover-url'])
            cover = retrieve_cover(info['#cover-url'])
        except KeyError:
            write_log('No cover found.')
            cover = None
        except urllib2.URLError, e:
            write_log(u'Error: While retrieving cover %s - %s' % 
                (info['#cover-url'], unicode(e)))
            cover = None
Example #4
0
    def search(self, artist, files=None):
        if files is not None and self.searchby:
            keywords = format_value(files[0], self.searchby)
        else:
            keywords = artist
        keywords = re.sub('\s+', self._separator, keywords)

        if self.search_source is None:
            album = self.retrieve(keywords)
            return [album] if album else []
        
        url = self._search_base.replace(u'%s', keywords)

        write_log(translate('Mp3tag', u'Retrieving search page: %s') % url)
        set_status(translate('Mp3tag', u'Retrieving search page...'))
        if self.html is None:
            page = get_encoding(urlopen(url), True, 'utf8')[1]
        else:
            page = get_encoding(self.html, True, 'utf8')[1]

        write_log(translate('Mp3tag', u'Parsing search page.'))
        set_status(translate('Mp3tag', u'Parsing search page...'))
        infos = parse_search_page(self.indexformat, page, self.search_source, url)
        return [(info, []) for info in infos]
Example #5
0
def parse_searchpage(page, artist=None, album=None, id_field=ALBUM_ID):
    """Parses a search page and gets relevant info.


    Arguments:
    page -- html string with search page's html.
    artist -- artist to to check for in results. If found only results
              with that artist are returned.
    album -- album to check for in results. If found only results with
             with the album are returned.
    id_field -- key to use for the album id found.

    Return a tuple with the first element being == True if the list
    was truncated with only matching artist/albums.
    
    """
    page = get_encoding(page, True, 'utf8')[1]
    soup = parse_html.SoupWrapper(parse_html.parse(page))
    result_table = soup.find('ul', {'class': 'search-results'})
    try:
        results = result_table.find_all('div', {'class': 'info'})
    except AttributeError:
        return []

    albums = [parse_search_element(result) for result in results]

    d = {}
    if artist and album:
        d = {'artist': artist, 'album': album}
        top = [album for album in albums if equal(d, album, True)]
    elif album:
        d = {'album': album}
        top = [album for album in albums if equal(d, album, True, ['album'])]
        if not top:
            top = [
                album for album in albums if equal(d, album, False, ['album'])
            ]
    elif artist:
        d = {'artist': artist}
        top = [album for album in albums if equal(d, album, True, ['artist'])]
        if not ret:
            top = [
                album for album in albums if equal(d, album, False, ['artist'])
            ]
    else:
        top = []

    return False, albums
Example #6
0
def parse_searchpage(page, artist=None, album=None, id_field=ALBUM_ID):
    """Parses a search page and gets relevant info.


    Arguments:
    page -- html string with search page's html.
    artist -- artist to to check for in results. If found only results
              with that artist are returned.
    album -- album to check for in results. If found only results with
             with the album are returned.
    id_field -- key to use for the album id found.

    Return a tuple with the first element being == True if the list
    was truncated with only matching artist/albums.
    
    """
    page = get_encoding(page, True, 'utf8')[1]
    soup = parse_html.SoupWrapper(parse_html.parse(page))
    result_table = soup.find('ul', {'class': 'search-results'})
    try:
        results = result_table.find_all('div',
            {'class': 'info'})
    except AttributeError:
        return []

    albums = [parse_search_element(result) for result in results]

    d = {}
    if artist and album:
        d = {'artist': artist, 'album': album}
        top = [album for album in albums if equal(d, album, True)]
    elif album:
        d = {'album': album}
        top = [album for album in albums if equal(d, album, True, ['album'])]
        if not top:
            top = [album for album in albums if 
                equal(d, album, False, ['album'])]
    elif artist:
        d = {'artist': artist}
        top = [album for album in albums if equal(d, album, True, ['artist'])]
        if not ret:
            top = [album for album in albums if
                equal(d, album, False, ['artist'])]
    else:
        top = []
    
    return False, albums
Example #7
0
    def retrieve(self, info):
        if isinstance(info, str):
            text = info.replace(' ', self._separator)
            info = {}
        else:
            info = deepcopy(info)
            text = info['#url']

        try:
            url = self.album_url % text
        except TypeError:
            url = self.album_url + text

        info['#url'] = url

        try:
            write_log(translate('Mp3tag', 'Retrieving album page: %s') % url)
            set_status(translate('Mp3tag', 'Retrieving album page...'))
            page = get_encoding(urlopen(url), True, 'utf8')[1]
        except:
            page = ''

        write_log(translate('Mp3tag', 'Parsing album page.'))
        set_status(translate('Mp3tag', 'Parsing album page...'))
        new_info, tracks = parse_album_page(page, self.album_source, url)
        info.update(dict((k, v) for k, v in new_info.items() if v))

        if self._get_cover and COVER in info:
            cover_url = new_info[COVER]
            if isinstance(cover_url, str):
                info.update(retrieve_cover(cover_url))
            else:
                info.update(list(map(retrieve_cover, cover_url)))
        if not tracks:
            tracks = None
        return info, tracks
Example #8
0
    def retrieve(self, info):
        if isinstance(info, basestring):
            text = info.replace(u' ', self._separator)
            info = {}
        else:
            info = deepcopy(info)
            text = info['#url']
        
        try:
            url = self.album_url % text
        except TypeError:
            url = self.album_url + text
        
        info['#url'] = url

        try:
            write_log(translate('Mp3tag', u'Retrieving album page: %s') % url)
            set_status(translate('Mp3tag', u'Retrieving album page...'))
            page = get_encoding(urlopen(url), True, 'utf8')[1]
        except:
            page = u''

        write_log(translate('Mp3tag', u'Parsing album page.'))
        set_status(translate('Mp3tag', u'Parsing album page...'))
        new_info, tracks = parse_album_page(page, self.album_source, url)
        info.update(dict((k,v) for k,v in new_info.iteritems() if v))
        
        if self._get_cover and COVER in info:
            cover_url = new_info[COVER]
            if isinstance(cover_url, basestring):
                info.update(retrieve_cover(cover_url))
            else:
                info.update(map(retrieve_cover, cover_url))
        if not tracks:
            tracks = None
        return info, tracks
Example #9
0
            set_status('Retrieving album.')
            write_log('Retrieving album.')
        write_log('Album URL - %s' % albuminfo['#albumurl'])
        url = albuminfo['#albumurl']
        try:
            if self._useid:
                info, tracks, cover = retrieve_album(url, self._getcover)
            else:
                info, tracks, cover = retrieve_album(url, self._getcover)
        except urllib.error.URLError as e:
            write_log('Error: While retrieving album URL %s - %s' %
                      (url, str(e)))
            raise RetrievalError(str(e))
        if cover:
            info.update(cover)
        albuminfo = albuminfo.copy()
        albuminfo.update(info)
        return albuminfo, tracks

    def applyPrefs(self, args):
        self._getcover = args[0]
        self._useid = args[1]


info = AllMusic

if __name__ == '__main__':
    f = get_encoding(open(sys.argv[1], 'r').read(), True)[1]
    x = parse_albumpage(f)
    print(x)
Example #10
0
        except KeyError:
            set_status('Retrieving album.')
            write_log('Retrieving album.')
        write_log('Album URL - %s' % albuminfo['#albumurl'])
        url = albuminfo['#albumurl']
        try:
            if self._useid:
                info, tracks, cover = retrieve_album(url, self._getcover)
            else:
                info, tracks, cover = retrieve_album(url, self._getcover)
        except urllib2.URLError, e:
            write_log(u'Error: While retrieving album URL %s - %s' % 
                (url, unicode(e)))
            raise RetrievalError(unicode(e))
        if cover:
            info.update(cover)
        albuminfo = albuminfo.copy()
        albuminfo.update(info)
        return albuminfo, tracks

    def applyPrefs(self, args):
        self._getcover = args[0]
        self._useid = args[1]

info = AllMusic

if __name__ == '__main__':
    f = get_encoding(open(sys.argv[1], 'r').read(), True)[1]
    x = parse_albumpage(f)
    print x