def get_cover_data(self, cover_url): if not cover_url: return None try: return common.get_url_contents(cover_url, self.user_agent) except IOError: return None
def find_lyrics(self, track): try: (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \ track.get_tag_raw('title')[0].encode("utf-8") except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = artist.replace(' ','_').replace('\'','') title = title.replace(' ','_').replace('\'','') url = 'http://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist) try: html = common.get_url_contents(url, self.user_agent) except: raise LyricsNotFoundException try: lyrics_html = lxml.html.fromstring(html) except lxml.etree.XMLSyntaxError: raise LyricsNotFoundException try: lyrics_body = lyrics_html.find_class('lyrics-body')[0] lyrics_body.remove(lyrics_body.get_element_by_id('video-musictory')) lyrics = re.sub('^\s+Lyrics to .+', '', lyrics_body.text_content()) except : raise LyricsNotFoundException return (lyrics, self.name, url)
def load_wikipedia_page(self, track): if track != self.hometrack: return artist = track.get_tag_display('artist') language = settings.get_option('plugin/wikipedia/language', 'en') if language not in LANGUAGES: log.error('Provided language "%s" not found.' % language) language = 'en' artist = urllib2.quote(artist.encode('utf-8'), '') url = "https://%s.m.wikipedia.org/wiki/Special:Search/%s" % (language, artist) try: html = common.get_url_contents(url, self.__user_agent) except urllib2.URLError as e: log.error(e) log.error( "Error occurred when trying to retrieve Wikipedia page " "for %s." % artist ) html = ( """ <p style="color: red">No Wikipedia page found for <strong>%s</strong></p> """ % artist ) GLib.idle_add(self.load_html, html, url)
def load_wikipedia_page(self, track): if track != self.hometrack: return artist = track.get_tag_display('artist') language = settings.get_option('plugin/wikipedia/language', 'en') if language not in LANGUAGES: log.error('Provided language "%s" not found.' % language) language = 'en' artist = urllib2.quote(artist.encode('utf-8'), '') url = "https://%s.m.wikipedia.org/wiki/Special:Search/%s" % (language, artist) try: html = common.get_url_contents(url, self.__user_agent) except urllib2.URLError as e: log.error(e) log.error("Error occurred when trying to retrieve Wikipedia page " "for %s." % artist) html = """ <p style="color: red">No Wikipedia page found for <strong>%s</strong></p> """ % artist GLib.idle_add(self.load_html, html, url)
def find_books(keyword, user_agent): ''' Returns a list of Book instances, with unknown chapters... ''' # urlencode the search string url=search_url+urllib.quote_plus(keyword) try: data=common.get_url_contents(url, user_agent) except: logger.error("LIBRIVOX: connection error") return [] try: tree=ElementTree.XML(data) except: logger.error("LIBRIVOX: XML error") return [] books = [] for elem in tree: if elem.tag == 'error': logger.error('LIBRIVOX: query error: %s' % elem.text) elif elem.tag == 'books': for bk in elem.findall('book'): title=bk.find("title").text rssurl=bk.find("url_rss").text book=Book(title, rssurl) books.append(book) return books
def get_all(self): ''' Unified function for getting chapters and info at the same time. ''' if self.loaded: return try: self.xmldata=common.get_url_contents(self.rssurl, self.user_agent) except: logger.error("LIBRIVOX: Connection error") return try: self.xmltree=ElementTree.XML(self.xmldata) except: logger.error("LIBRIVOX: XML error") return self.chapters=[] items=self.xmltree.findall("channel/item") for item in items: title=item.find("title").text link=item.find("link").text duration=item.find("{http://www.itunes.com/dtds/podcast-1.0.dtd}duration").text if duration is None: duration = 'Unknown length' link=link.replace("_64kb.mp3", ".ogg") self.chapters.append([title+" "+"("+duration+")", link]) self.info=self.xmltree.find("channel/description") self.info=self.info.text self.loaded=True return
def find_lyrics(self, track): try: (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \ track.get_tag_raw('title')[0].encode("utf-8") except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = artist.replace(' ', '_').replace('\'', '') title = title.replace(' ', '_').replace('\'', '') url = 'http://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist) try: html = common.get_url_contents(url, self.user_agent) except: raise LyricsNotFoundException try: lyrics_html = lxml.html.fromstring(html) except lxml.etree.XMLSyntaxError: raise LyricsNotFoundException try: lyrics_body = lyrics_html.find_class('lyrics-body')[0] lyrics_body.remove( lyrics_body.get_element_by_id('video-musictory')) lyrics = re.sub('^\s+Lyrics to .+', '', lyrics_body.text_content()) except: raise LyricsNotFoundException return (lyrics, self.name, url)
def find_books(keyword, user_agent): ''' Returns a list of Book instances, with unknown chapters... ''' # urlencode the search string url = search_url + urllib.quote_plus(keyword) try: data = common.get_url_contents(url, user_agent) except: logger.error("LIBRIVOX: connection error") return [] try: tree = ElementTree.XML(data) except: logger.error("LIBRIVOX: XML error") return [] books = [] for elem in tree: if elem.tag == 'error': logger.error('LIBRIVOX: query error: %s' % elem.text) elif elem.tag == 'books': for bk in elem.findall('book'): title = bk.find("title").text rssurl = bk.find("url_rss").text book = Book(title, rssurl) books.append(book) return books
def get_all(self): ''' Unified function for getting chapters and info at the same time. ''' if self.loaded: return try: self.xmldata = common.get_url_contents(self.rssurl, self.user_agent) except Exception: logger.error("LIBRIVOX: Connection error") return try: self.xmltree = ElementTree.XML(self.xmldata) except Exception: logger.error("LIBRIVOX: XML error") return self.chapters = [] items = self.xmltree.findall("channel/item") for item in items: title = item.find("title").text link = item.find("link").text duration = item.find("{http://www.itunes.com/dtds/podcast-1.0.dtd}duration").text if duration is None: duration = 'Unknown length' link = link.replace("_64kb.mp3", ".ogg") self.chapters.append([title + " " + "(" + duration + ")", link]) self.info = self.xmltree.find("channel/description") self.info = self.info.text self.loaded = True return
def search_covers(search, api_key, secret_key, user_agent): params = { 'Operation': 'ItemSearch', 'Keywords': str(search), 'AssociateTag': 'InvalidTag', # now required for AWS cover search API 'Version': '2009-01-06', 'SearchIndex': 'Music', 'Service': 'AWSECommerceService', 'ResponseGroup': 'ItemAttributes,Images', } query_string = get_aws_query_string(str(api_key).strip(), str(secret_key).strip(), params) headers = {'User-Agent': user_agent} req = urllib2.Request(query_string, None, headers) data = urllib2.urlopen(req).read() data = common.get_url_contents(query_string, user_agent) # check for an error message m = re.search(r'<Message>(.*)</Message>', data, re.DOTALL) if m: logger.warning('Amazon Covers Search Error: %s' % m.group(1)) raise AmazonSearchError(m.group(1)) # check for large images regex = re.compile(r'<LargeImage><URL>([^<]*)', re.DOTALL) items = regex.findall(data) return items
def find_lyrics(self, track): try: (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \ track.get_tag_raw('title')[0].encode("utf-8") except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = urllib.quote(artist.replace(' ','_')) title = urllib.quote(title.replace(' ','_')) url = 'http://lyrics.wikia.com/wiki/%s:%s' % (artist, title) try: html = common.get_url_contents(url, self.user_agent) except Exception: raise LyricsNotFoundException try: soup = BeautifulSoup.BeautifulSoup(html) except HTMLParser.HTMLParseError: raise LyricsNotFoundException lyrics = soup.findAll(attrs= {"class" : "lyricbox"}) if lyrics: lyrics = re.sub(r' Send.*?Ringtone to your Cell ','','\n'.join(self.remove_div(lyrics[0].renderContents().replace('<br />','\n')).replace('\n\n\n','').split('\n'))) else: raise LyricsNotFoundException lyrics = self.remove_script(lyrics) lyrics = self.remove_html_tags(unicode(BeautifulSoup.BeautifulStoneSoup(lyrics,convertEntities=BeautifulSoup.BeautifulStoneSoup.HTML_ENTITIES))) return (lyrics, self.name, url)
def get_cover_data(self, db_string): """ Get the image data """ data = None mbid, size = db_string.split(':') url = self.__caa_url.format(mbid=mbid, size=size) try: logger.debug('Fetching cover from {url}'.format(url=url)) data = common.get_url_contents(url, self.user_agent) except urllib2.HTTPError: pass return data
def find_lyrics(self, track): try: (artist, title) = ( track.get_tag_raw('artist')[0], track.get_tag_raw('title')[0], ) except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = artist.replace(' ', '_').replace('\'', '').lower() title = title.replace(' ', '_').replace('\'', '').lower() url = 'https://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist) try: html = common.get_url_contents(url, self.user_agent) except Exception: raise LyricsNotFoundException try: lyrics_html = lxml.html.fromstring(html) except lxml.etree.XMLSyntaxError: raise LyricsNotFoundException try: lyrics_body = lyrics_html.find_class('lyrics-body')[0] lyrics_body.remove( lyrics_body.get_element_by_id('video-musictory')) lyrics = re.sub(r'^\s+Lyrics to .+', '', lyrics_body.text_content()) lyrics = lyrics.replace('\t', '') lyrics = self.remove_script(lyrics) lyrics = self.remove_html_tags(lyrics) except Exception: raise LyricsNotFoundException # We end up with unicode in some systems, str (bytes) in others; # no idea why and which one is correct. if isinstance(lyrics, bytes): lyrics = lyrics.decode('utf-8', errors='replace') return (lyrics, self.name, url)
def find_covers(self, track, limit=-1): """ Searches last.fm for album covers """ # TODO: handle multi-valued fields better try: (artist, album, title) = ( track.get_tag_raw('artist')[0], track.get_tag_raw('album')[0], track.get_tag_raw('title')[0], ) except TypeError: return [] if not artist or not album or not title: return [] for type, value in (('album', album), ('track', title)): url = self.url.format( api_rurl=API_ROOT_URL, type=type, value=quote_plus(value.encode("utf-8")), api_key=API_KEY, ) try: data = common.get_url_contents(url, self.user_agent) except IOError: return [] try: data_json = json.loads(data) except ValueError: continue try: for element in data_json['results']["%smatches" % type][type]: if element['artist'] == artist.encode("utf-8"): for image in element['image']: if image['size'] == 'extralarge': return [image['#text']] except KeyError: continue return []
def find_lyrics(self, track): try: (artist, title) = track.get_tag_raw('artist')[0].encode("utf-8"), \ track.get_tag_raw('title')[0].encode("utf-8") except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = urllib.quote(artist.replace(' ', '_')) title = urllib.quote(title.replace(' ', '_')) url = 'http://lyrics.wikia.com/wiki/%s:%s' % (artist, title) try: html = common.get_url_contents(url, self.user_agent) except: raise LyricsNotFoundException try: soup = BeautifulSoup.BeautifulSoup(html) except HTMLParser.HTMLParseError: raise LyricsNotFoundException lyrics = soup.findAll(attrs={"class": "lyricbox"}) if lyrics: lyrics = re.sub( r' Send.*?Ringtone to your Cell ', '', '\n'.join( self.remove_div(lyrics[0].renderContents().replace( '<br />', '\n')).replace('\n\n\n', '').split('\n')[0:-7])) else: raise LyricsNotFoundException lyrics = self.remove_script(lyrics) lyrics = self.remove_html_tags( unicode( BeautifulSoup.BeautifulStoneSoup( lyrics, convertEntities=BeautifulSoup.BeautifulStoneSoup. HTML_ENTITIES))) return (lyrics, self.name, url)
def find_lyrics(self, track): try: (artist, title) = ( track.get_tag_raw('artist')[0].encode("utf-8"), track.get_tag_raw('title')[0].encode("utf-8"), ) except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = artist.replace(' ', '_').replace('\'', '').lower() title = title.replace(' ', '_').replace('\'', '').lower() url = 'http://www.lyricsmania.com/%s_lyrics_%s.html' % (title, artist) try: html = common.get_url_contents(url, self.user_agent) except Exception: raise LyricsNotFoundException try: lyrics_html = lxml.html.fromstring(html) except lxml.etree.XMLSyntaxError: raise LyricsNotFoundException try: lyrics_body = lyrics_html.find_class('lyrics-body')[0] lyrics_body.remove(lyrics_body.get_element_by_id('video-musictory')) lyrics = re.sub('^\s+Lyrics to .+', '', lyrics_body.text_content()) lyrics = lyrics.replace('\t', '') lyrics = self.remove_script(lyrics) lyrics = self.remove_html_tags(lyrics) except Exception: raise LyricsNotFoundException # We end up with unicode in some systems, str (bytes) in others; # no idea why and which one is correct. if isinstance(lyrics, bytes): lyrics = lyrics.decode('utf-8', errors='replace') return (lyrics, self.name, url)
def find_covers(self, track, limit=-1): """ Searches last.fm for album covers """ # TODO: handle multi-valued fields better try: (artist, album, title) = ( track.get_tag_raw('artist')[0], track.get_tag_raw('album')[0], track.get_tag_raw('title')[0], ) except TypeError: return [] if not artist or not album or not title: return [] for type, value in (('album', album), ('track', title)): url = self.url.format(type=type, value=quote_plus(value.encode("utf-8")), api_key=API_KEY) try: data = common.get_url_contents(url, self.user_agent) except IOError: continue try: xml = ETree.fromstring(data) except SyntaxError: continue for element in xml.getiterator(type): if element.find('artist').text == artist.encode("utf-8"): for sub_element in element.findall('image'): if sub_element.attrib['size'] == 'extralarge': url = sub_element.text if url: return [url] return []
def find_covers(self, track, limit=-1): """ Searches last.fm for album covers """ # TODO: handle multi-valued fields better try: (artist, album, title) = track.get_tag_raw('artist')[0], \ track.get_tag_raw('album')[0], \ track.get_tag_raw('title')[0] except TypeError: return [] if not artist or not album or not title: return [] for type, value in (('album', album), ('track', title)): url = self.url.format( type=type, value=quote_plus(value.encode("utf-8")), api_key=API_KEY ) try: data = common.get_url_contents(url, self.user_agent) except IOError: continue try: xml = ETree.fromstring(data) except SyntaxError: continue for element in xml.getiterator(type): if (element.find('artist').text == artist.encode("utf-8")): for sub_element in element.findall('image'): if (sub_element.attrib['size'] == 'extralarge'): url = sub_element.text if url: return [url] return []
def find_lyrics(self, track): try: (artist, title) = ( track.get_tag_raw('artist')[0].encode("utf-8"), track.get_tag_raw('title')[0].encode("utf-8"), ) except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = urllib.quote(artist.replace(' ', '_')) title = urllib.quote(title.replace(' ', '_')) url = 'https://lyrics.fandom.com/wiki/%s:%s' % (artist, title) try: html = common.get_url_contents(url, self.user_agent) except Exception: raise LyricsNotFoundException try: soup = BeautifulSoup(html, "lxml") except HTMLParser.HTMLParseError: raise LyricsNotFoundException lyrics = soup.findAll(attrs={"class": "lyricbox"}) if lyrics: with_div = lyrics[0].renderContents().replace('<br />', '\n') string = '\n'.join( self.remove_div(with_div).replace('\n\n\n', '').split('\n') ) lyrics = re.sub(r' Send.*?Ringtone to your Cell ', '', string) else: raise LyricsNotFoundException lyrics = self.remove_script(lyrics) lyrics = self.remove_html_tags(unicode(BeautifulSoup(lyrics, "lxml"))) return (lyrics, self.name, url)
def find_lyrics(self, track): try: (artist, title) = ( track.get_tag_raw("artist")[0].encode("utf-8"), track.get_tag_raw("title")[0].encode("utf-8"), ) except TypeError: raise LyricsNotFoundException if not artist or not title: raise LyricsNotFoundException artist = artist.replace(" ", "_").replace("'", "").lower() title = title.replace(" ", "_").replace("'", "").lower() url = "http://www.lyricsmania.com/%s_lyrics_%s.html" % (title, artist) try: html = common.get_url_contents(url, self.user_agent) except: raise LyricsNotFoundException try: lyrics_html = lxml.html.fromstring(html) except lxml.etree.XMLSyntaxError: raise LyricsNotFoundException try: lyrics_body = lyrics_html.find_class("lyrics-body")[0] lyrics_body.remove(lyrics_body.get_element_by_id("video-musictory")) lyrics = re.sub("^\s+Lyrics to .+", "", lyrics_body.text_content()) except: raise LyricsNotFoundException # We end up with unicode in some systems, str (bytes) in others; # no idea why and which one is correct. if isinstance(lyrics, bytes): lyrics = lyrics.decode("utf-8", errors="replace") return (lyrics, self.name, url)
def get_cover_data(self, url): return common.get_url_contents(url, USER_AGENT)
def get_cover_data(self, url): return common.get_url_contents(url, self.user_agent)
def get_json(url): return json.loads(common.get_url_contents(url, USER_AGENT))