Example #1
0
 def get_cover_data(self, cover_url):
     if not cover_url:
         return None
     try:
         return common.get_url_contents(cover_url, self.user_agent)
     except IOError:
         return None
Example #2
0
    def find_lyrics(self, track):
        try:
            (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \
                track.get_tag_raw('title')[0].encode("utf-8")
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = artist.replace(' ','_').replace('\'','')
        title = title.replace(' ','_').replace('\'','')

        url = 'http://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except:
            raise LyricsNotFoundException

        try:
            lyrics_html = lxml.html.fromstring(html)
        except lxml.etree.XMLSyntaxError:
            raise LyricsNotFoundException

        try:
            lyrics_body = lyrics_html.find_class('lyrics-body')[0]
            lyrics_body.remove(lyrics_body.get_element_by_id('video-musictory'))
            lyrics = re.sub('^\s+Lyrics to .+', '', lyrics_body.text_content())
        except :
            raise LyricsNotFoundException

        return (lyrics, self.name, url)
Example #3
0
    def load_wikipedia_page(self, track):
        if track != self.hometrack:
            return

        artist = track.get_tag_display('artist')
        language = settings.get_option('plugin/wikipedia/language', 'en')
        if language not in LANGUAGES:
            log.error('Provided language "%s" not found.' % language)
            language = 'en'

        artist = urllib2.quote(artist.encode('utf-8'), '')
        url = "https://%s.m.wikipedia.org/wiki/Special:Search/%s" % (language, artist)

        try:
            html = common.get_url_contents(url, self.__user_agent)
        except urllib2.URLError as e:
            log.error(e)
            log.error(
                "Error occurred when trying to retrieve Wikipedia page "
                "for %s." % artist
            )
            html = (
                """
                <p style="color: red">No Wikipedia page found for <strong>%s</strong></p>
                """
                % artist
            )

        GLib.idle_add(self.load_html, html, url)
Example #4
0
 def get_cover_data(self, cover_url):
     if not cover_url:
         return None
     try:
         return common.get_url_contents(cover_url, self.user_agent)
     except IOError:
         return None
Example #5
0
    def load_wikipedia_page(self, track):
        if track != self.hometrack:
            return

        artist = track.get_tag_display('artist')
        language = settings.get_option('plugin/wikipedia/language', 'en')
        if language not in LANGUAGES:
            log.error('Provided language "%s" not found.' % language)
            language = 'en'

        artist = urllib2.quote(artist.encode('utf-8'), '')
        url = "https://%s.m.wikipedia.org/wiki/Special:Search/%s" % (language,
                                                                     artist)

        try:
            html = common.get_url_contents(url, self.__user_agent)
        except urllib2.URLError as e:
            log.error(e)
            log.error("Error occurred when trying to retrieve Wikipedia page "
                      "for %s." % artist)
            html = """
                <p style="color: red">No Wikipedia page found for <strong>%s</strong></p>
                """ % artist

        GLib.idle_add(self.load_html, html, url)
def find_books(keyword, user_agent):
    '''
        Returns a list of Book instances, with unknown chapters...
    '''
    
    # urlencode the search string
    url=search_url+urllib.quote_plus(keyword)
    
    try:
        data=common.get_url_contents(url, user_agent)
    except:
        logger.error("LIBRIVOX: connection error")
        return []
    
    try:
        tree=ElementTree.XML(data)
    except:
        logger.error("LIBRIVOX: XML error")
        return []
    
    books = []
    
    for elem in tree:
        if elem.tag == 'error':
            logger.error('LIBRIVOX: query error: %s' % elem.text)
        
        elif elem.tag == 'books':
            for bk in elem.findall('book'):
                title=bk.find("title").text
                rssurl=bk.find("url_rss").text
                book=Book(title, rssurl)
                books.append(book)
    
    return books
    def get_all(self):
        '''
            Unified function for getting chapters and info at the same
            time.
        '''
        if self.loaded:
            return

        try:
            self.xmldata=common.get_url_contents(self.rssurl, self.user_agent)
        except:
            logger.error("LIBRIVOX: Connection error")
            return
        
        try:
            self.xmltree=ElementTree.XML(self.xmldata)
        except:
            logger.error("LIBRIVOX: XML error")
            return
        
        self.chapters=[]
        items=self.xmltree.findall("channel/item")
        for item in items:
            title=item.find("title").text
            link=item.find("link").text
            duration=item.find("{http://www.itunes.com/dtds/podcast-1.0.dtd}duration").text
            if duration is None:
                duration = 'Unknown length'
            link=link.replace("_64kb.mp3", ".ogg")
            self.chapters.append([title+" "+"("+duration+")", link])

        self.info=self.xmltree.find("channel/description")
        self.info=self.info.text
        self.loaded=True
        return
Example #8
0
    def find_lyrics(self, track):
        try:
            (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \
                track.get_tag_raw('title')[0].encode("utf-8")
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = artist.replace(' ', '_').replace('\'', '')
        title = title.replace(' ', '_').replace('\'', '')

        url = 'http://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except:
            raise LyricsNotFoundException

        try:
            lyrics_html = lxml.html.fromstring(html)
        except lxml.etree.XMLSyntaxError:
            raise LyricsNotFoundException

        try:
            lyrics_body = lyrics_html.find_class('lyrics-body')[0]
            lyrics_body.remove(
                lyrics_body.get_element_by_id('video-musictory'))
            lyrics = re.sub('^\s+Lyrics to .+', '', lyrics_body.text_content())
        except:
            raise LyricsNotFoundException

        return (lyrics, self.name, url)
Example #9
0
def find_books(keyword, user_agent):
    '''
        Returns a list of Book instances, with unknown chapters...
    '''

    # urlencode the search string
    url = search_url + urllib.quote_plus(keyword)

    try:
        data = common.get_url_contents(url, user_agent)
    except:
        logger.error("LIBRIVOX: connection error")
        return []

    try:
        tree = ElementTree.XML(data)
    except:
        logger.error("LIBRIVOX: XML error")
        return []

    books = []

    for elem in tree:
        if elem.tag == 'error':
            logger.error('LIBRIVOX: query error: %s' % elem.text)

        elif elem.tag == 'books':
            for bk in elem.findall('book'):
                title = bk.find("title").text
                rssurl = bk.find("url_rss").text
                book = Book(title, rssurl)
                books.append(book)

    return books
Example #10
0
    def get_all(self):
        '''
            Unified function for getting chapters and info at the same
            time.
        '''
        if self.loaded:
            return

        try:
            self.xmldata = common.get_url_contents(self.rssurl, self.user_agent)
        except Exception:
            logger.error("LIBRIVOX: Connection error")
            return

        try:
            self.xmltree = ElementTree.XML(self.xmldata)
        except Exception:
            logger.error("LIBRIVOX: XML error")
            return

        self.chapters = []
        items = self.xmltree.findall("channel/item")
        for item in items:
            title = item.find("title").text
            link = item.find("link").text
            duration = item.find("{http://www.itunes.com/dtds/podcast-1.0.dtd}duration").text
            if duration is None:
                duration = 'Unknown length'
            link = link.replace("_64kb.mp3", ".ogg")
            self.chapters.append([title + " " + "(" + duration + ")", link])

        self.info = self.xmltree.find("channel/description")
        self.info = self.info.text
        self.loaded = True
        return
Example #11
0
def search_covers(search, api_key, secret_key, user_agent):
    params = {
        'Operation': 'ItemSearch',
        'Keywords': str(search),
        'AssociateTag': 'InvalidTag', # now required for AWS cover search API
        'Version': '2009-01-06',
        'SearchIndex': 'Music',
        'Service': 'AWSECommerceService',
        'ResponseGroup': 'ItemAttributes,Images',
        }

    query_string = get_aws_query_string(str(api_key).strip(),
        str(secret_key).strip(), params)

    headers = {'User-Agent': user_agent}
    req = urllib2.Request(query_string, None, headers)
    data = urllib2.urlopen(req).read()
    
    data = common.get_url_contents(query_string, user_agent)

    # check for an error message
    m = re.search(r'<Message>(.*)</Message>', data, re.DOTALL)
    if m:
        logger.warning('Amazon Covers Search Error: %s' % m.group(1))
        raise AmazonSearchError(m.group(1))

    # check for large images
    regex = re.compile(r'<LargeImage><URL>([^<]*)', re.DOTALL)
    items = regex.findall(data)

    return items
Example #12
0
    def find_lyrics(self, track):
        try:
            (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \
                track.get_tag_raw('title')[0].encode("utf-8")
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = urllib.quote(artist.replace(' ','_'))
        title = urllib.quote(title.replace(' ','_'))

        url = 'http://lyrics.wikia.com/wiki/%s:%s' % (artist, title)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except Exception:
            raise LyricsNotFoundException

        try:
            soup = BeautifulSoup.BeautifulSoup(html)
        except HTMLParser.HTMLParseError:
            raise LyricsNotFoundException
        lyrics = soup.findAll(attrs= {"class" : "lyricbox"})
        if lyrics:
            lyrics = re.sub(r' Send.*?Ringtone to your Cell ','','\n'.join(self.remove_div(lyrics[0].renderContents().replace('<br />','\n')).replace('\n\n\n','').split('\n')))
        else:
            raise LyricsNotFoundException

        lyrics = self.remove_script(lyrics)
        lyrics = self.remove_html_tags(unicode(BeautifulSoup.BeautifulStoneSoup(lyrics,convertEntities=BeautifulSoup.BeautifulStoneSoup.HTML_ENTITIES)))

        return (lyrics, self.name, url)
Example #13
0
    def get_cover_data(self, db_string):
        """
            Get the image data
        """
        data = None
        mbid, size = db_string.split(':')
        url = self.__caa_url.format(mbid=mbid, size=size)

        try:
            logger.debug('Fetching cover from {url}'.format(url=url))
            data = common.get_url_contents(url, self.user_agent)
        except urllib2.HTTPError:
            pass

        return data
Example #14
0
    def get_cover_data(self, db_string):
        """
            Get the image data
        """
        data = None
        mbid, size = db_string.split(':')
        url = self.__caa_url.format(mbid=mbid, size=size)

        try:
            logger.debug('Fetching cover from {url}'.format(url=url))
            data = common.get_url_contents(url, self.user_agent)
        except urllib2.HTTPError:
            pass

        return data
Example #15
0
    def find_lyrics(self, track):
        try:
            (artist, title) = (
                track.get_tag_raw('artist')[0],
                track.get_tag_raw('title')[0],
            )
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = artist.replace(' ', '_').replace('\'', '').lower()
        title = title.replace(' ', '_').replace('\'', '').lower()

        url = 'https://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except Exception:
            raise LyricsNotFoundException

        try:
            lyrics_html = lxml.html.fromstring(html)
        except lxml.etree.XMLSyntaxError:
            raise LyricsNotFoundException

        try:
            lyrics_body = lyrics_html.find_class('lyrics-body')[0]
            lyrics_body.remove(
                lyrics_body.get_element_by_id('video-musictory'))
            lyrics = re.sub(r'^\s+Lyrics to .+', '',
                            lyrics_body.text_content())
            lyrics = lyrics.replace('\t', '')
            lyrics = self.remove_script(lyrics)
            lyrics = self.remove_html_tags(lyrics)
        except Exception:
            raise LyricsNotFoundException

        # We end up with unicode in some systems, str (bytes) in others;
        # no idea why and which one is correct.
        if isinstance(lyrics, bytes):
            lyrics = lyrics.decode('utf-8', errors='replace')
        return (lyrics, self.name, url)
Example #16
0
    def find_covers(self, track, limit=-1):
        """
            Searches last.fm for album covers
        """
        # TODO: handle multi-valued fields better
        try:
            (artist, album, title) = (
                track.get_tag_raw('artist')[0],
                track.get_tag_raw('album')[0],
                track.get_tag_raw('title')[0],
            )
        except TypeError:
            return []

        if not artist or not album or not title:
            return []

        for type, value in (('album', album), ('track', title)):
            url = self.url.format(
                api_rurl=API_ROOT_URL,
                type=type,
                value=quote_plus(value.encode("utf-8")),
                api_key=API_KEY,
            )
            try:
                data = common.get_url_contents(url, self.user_agent)
            except IOError:
                return []

            try:
                data_json = json.loads(data)
            except ValueError:
                continue

            try:
                for element in data_json['results']["%smatches" % type][type]:
                    if element['artist'] == artist.encode("utf-8"):
                        for image in element['image']:
                            if image['size'] == 'extralarge':
                                return [image['#text']]
            except KeyError:
                continue

        return []
Example #17
0
    def find_covers(self, track, limit=-1):
        """
            Searches last.fm for album covers
        """
        # TODO: handle multi-valued fields better
        try:
            (artist, album, title) = (
                track.get_tag_raw('artist')[0],
                track.get_tag_raw('album')[0],
                track.get_tag_raw('title')[0],
            )
        except TypeError:
            return []

        if not artist or not album or not title:
            return []

        for type, value in (('album', album), ('track', title)):
            url = self.url.format(
                api_rurl=API_ROOT_URL,
                type=type,
                value=quote_plus(value.encode("utf-8")),
                api_key=API_KEY,
            )
            try:
                data = common.get_url_contents(url, self.user_agent)
            except IOError:
                return []

            try:
                data_json = json.loads(data)
            except ValueError:
                continue

            try:
                for element in data_json['results']["%smatches" % type][type]:
                    if element['artist'] == artist.encode("utf-8"):
                        for image in element['image']:
                            if image['size'] == 'extralarge':
                                return [image['#text']]
            except KeyError:
                continue

        return []
Example #18
0
    def find_lyrics(self, track):
        try:
            (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \
                track.get_tag_raw('title')[0].encode("utf-8")
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = urllib.quote(artist.replace(' ', '_'))
        title = urllib.quote(title.replace(' ', '_'))

        url = 'http://lyrics.wikia.com/wiki/%s:%s' % (artist, title)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except:
            raise LyricsNotFoundException

        try:
            soup = BeautifulSoup.BeautifulSoup(html)
        except HTMLParser.HTMLParseError:
            raise LyricsNotFoundException
        lyrics = soup.findAll(attrs={"class": "lyricbox"})
        if lyrics:
            lyrics = re.sub(
                r' Send.*?Ringtone to your Cell ', '', '\n'.join(
                    self.remove_div(lyrics[0].renderContents().replace(
                        '<br />', '\n')).replace('\n\n\n',
                                                 '').split('\n')[0:-7]))
        else:
            raise LyricsNotFoundException

        lyrics = self.remove_script(lyrics)
        lyrics = self.remove_html_tags(
            unicode(
                BeautifulSoup.BeautifulStoneSoup(
                    lyrics,
                    convertEntities=BeautifulSoup.BeautifulStoneSoup.
                    HTML_ENTITIES)))

        return (lyrics, self.name, url)
Example #19
0
    def find_lyrics(self, track):
        try:
            (artist, title) = (
                track.get_tag_raw('artist')[0].encode("utf-8"),
                track.get_tag_raw('title')[0].encode("utf-8"),
            )
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = artist.replace(' ', '_').replace('\'', '').lower()
        title = title.replace(' ', '_').replace('\'', '').lower()

        url = 'http://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except Exception:
            raise LyricsNotFoundException

        try:
            lyrics_html = lxml.html.fromstring(html)
        except lxml.etree.XMLSyntaxError:
            raise LyricsNotFoundException

        try:
            lyrics_body = lyrics_html.find_class('lyrics-body')[0]
            lyrics_body.remove(lyrics_body.get_element_by_id('video-musictory'))
            lyrics = re.sub('^\s+Lyrics to .+', '', lyrics_body.text_content())
            lyrics = lyrics.replace('\t', '')
            lyrics = self.remove_script(lyrics)
            lyrics = self.remove_html_tags(lyrics)
        except Exception:
            raise LyricsNotFoundException

        # We end up with unicode in some systems, str (bytes) in others;
        # no idea why and which one is correct.
        if isinstance(lyrics, bytes):
            lyrics = lyrics.decode('utf-8', errors='replace')
        return (lyrics, self.name, url)
Example #20
0
    def find_covers(self, track, limit=-1):
        """
            Searches last.fm for album covers
        """
        # TODO: handle multi-valued fields better
        try:
            (artist, album, title) = (
                track.get_tag_raw('artist')[0],
                track.get_tag_raw('album')[0],
                track.get_tag_raw('title')[0],
            )
        except TypeError:
            return []

        if not artist or not album or not title:
            return []

        for type, value in (('album', album), ('track', title)):
            url = self.url.format(type=type,
                                  value=quote_plus(value.encode("utf-8")),
                                  api_key=API_KEY)
            try:
                data = common.get_url_contents(url, self.user_agent)
            except IOError:
                continue

            try:
                xml = ETree.fromstring(data)
            except SyntaxError:
                continue

            for element in xml.getiterator(type):
                if element.find('artist').text == artist.encode("utf-8"):
                    for sub_element in element.findall('image'):
                        if sub_element.attrib['size'] == 'extralarge':
                            url = sub_element.text
                            if url:
                                return [url]

        return []
Example #21
0
    def find_covers(self, track, limit=-1):
        """
            Searches last.fm for album covers
        """
        # TODO: handle multi-valued fields better
        try:
            (artist, album, title) = track.get_tag_raw('artist')[0], \
                track.get_tag_raw('album')[0], \
                track.get_tag_raw('title')[0]
        except TypeError:
            return []

        if not artist or not album or not title:
            return []

        for type, value in (('album', album), ('track', title)):
            url = self.url.format(
                type=type,
                value=quote_plus(value.encode("utf-8")),
                api_key=API_KEY
            )
            try:
                data = common.get_url_contents(url, self.user_agent)
            except IOError:
                continue

            try:
                xml = ETree.fromstring(data)
            except SyntaxError:
                continue

            for element in xml.getiterator(type):
                if (element.find('artist').text == artist.encode("utf-8")):
                    for sub_element in element.findall('image'):
                        if (sub_element.attrib['size'] == 'extralarge'):
                            url = sub_element.text
                            if url:
                                return [url]

        return []
Example #22
0
    def find_lyrics(self, track):
        try:
            (artist, title) = (
                track.get_tag_raw('artist')[0].encode("utf-8"),
                track.get_tag_raw('title')[0].encode("utf-8"),
            )
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = urllib.quote(artist.replace(' ', '_'))
        title = urllib.quote(title.replace(' ', '_'))

        url = 'https://lyrics.fandom.com/wiki/%s:%s' % (artist, title)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except Exception:
            raise LyricsNotFoundException

        try:
            soup = BeautifulSoup(html, "lxml")
        except HTMLParser.HTMLParseError:
            raise LyricsNotFoundException
        lyrics = soup.findAll(attrs={"class": "lyricbox"})
        if lyrics:
            with_div = lyrics[0].renderContents().replace('<br />', '\n')
            string = '\n'.join(
                self.remove_div(with_div).replace('\n\n\n', '').split('\n')
            )
            lyrics = re.sub(r' Send.*?Ringtone to your Cell ', '', string)
        else:
            raise LyricsNotFoundException

        lyrics = self.remove_script(lyrics)
        lyrics = self.remove_html_tags(unicode(BeautifulSoup(lyrics, "lxml")))

        return (lyrics, self.name, url)
Example #23
0
    def find_lyrics(self, track):
        try:
            (artist, title) = (
                track.get_tag_raw("artist")[0].encode("utf-8"),
                track.get_tag_raw("title")[0].encode("utf-8"),
            )
        except TypeError:
            raise LyricsNotFoundException

        if not artist or not title:
            raise LyricsNotFoundException

        artist = artist.replace(" ", "_").replace("'", "").lower()
        title = title.replace(" ", "_").replace("'", "").lower()

        url = "http://www.lyricsmania.com/%s_lyrics_%s.html" % (title, artist)

        try:
            html = common.get_url_contents(url, self.user_agent)
        except:
            raise LyricsNotFoundException

        try:
            lyrics_html = lxml.html.fromstring(html)
        except lxml.etree.XMLSyntaxError:
            raise LyricsNotFoundException

        try:
            lyrics_body = lyrics_html.find_class("lyrics-body")[0]
            lyrics_body.remove(lyrics_body.get_element_by_id("video-musictory"))
            lyrics = re.sub("^\s+Lyrics to .+", "", lyrics_body.text_content())
        except:
            raise LyricsNotFoundException

        # We end up with unicode in some systems, str (bytes) in others;
        # no idea why and which one is correct.
        if isinstance(lyrics, bytes):
            lyrics = lyrics.decode("utf-8", errors="replace")
        return (lyrics, self.name, url)
Example #24
0
 def get_cover_data(self, url):
     return common.get_url_contents(url, USER_AGENT)
Example #25
0
 def get_cover_data(self, url):
     return common.get_url_contents(url, self.user_agent)
Example #26
0
 def get_cover_data(self, url):
     return common.get_url_contents(url, self.user_agent)
Example #27
0
def get_json(url):
    return json.loads(common.get_url_contents(url, USER_AGENT))
Example #28
0
 def get_cover_data(self, url):
     return common.get_url_contents(url, USER_AGENT)
Example #29
0
def get_json(url):
    return json.loads(common.get_url_contents(url, USER_AGENT))