def update_streams(self, cat): streams = [] if not cat in self.catmap: return # collect self.status(0.0) html = ahttp.get(self.base_url + self.catmap[cat]) max = int(conf.max_streams) / 50 # or enable conf.housemixes_pages? for i in range(2, int(max)): self.status(float(i) / max) if html.find("latest/" + str(i)): html = html + ahttp.get(self.base_url + self.catmap[cat] + "/latest/%s" % i) html = re.sub("</body>.+?<body>", "", html, 100, re.S) self.status("Extracting mixes…") # extract for card in [pq(e) for e in pq(html)(".card-audio")]: r = { "title": card(".card-audio-title span").text(), "playing": card(".card-audio-user a").text(), "genre": card(".card-tags a span").text(), # url will be substitute later "url": self.base_url + card(".audio-image-link").attr("href"), "homepage": self.base_url + card(".audio-image-link").attr("href"), # standard size 318x318 loads quicker "img": card(".audio-image-link img").attr("src"), # re.sub("=318&", "=32&", ...) "listeners": int(card("a.card-plays").text()), "bitrate": sum(int(a) for a in card(".card-likes, .card-downloads, .card-favs").text().split()), } streams.append(r) #log.DATA( streams ) return streams
def upload(self, e, form=0): if e: login = self.user_pw() if not login: return submit = { "user": login[0], # api "passwort": login[1], # api "url": e["url"], "bemerkung": e["title"], "genre": e["genre"], "typ": e["format"][6:], "eintragen": "eintragen", # form } # just push data in, like the form does if form: self.login() ahttp.get(self.api + "c_neu.jsp", params=submit, ajax=1, post=1) # use JSON interface else: ahttp.get(self.api + "commonadd.json", params=submit, ajax=1)
def update_streams(self, cat): # result list entries = [] # common if (cat == "common"): # fetch data = ahttp.get(self.api + "common.json", encoding="utf-8") entries = json.loads(data) # bookmarks elif (cat == "personal") and self.user_pw(): data = ahttp.get(self.api + "favoriten.json?user="******"utf-8") entries = json.loads(data) # unknown else: self.status("Unknown category") pass # augment result list for i, e in enumerate(entries): entries[i][ "homepage"] = self.api + "c_common_details.jsp?url=" + e["url"] entries[i]["genre"] = cat entries[i]["format"] = "audio/mpeg" # send back return entries
def init2(self, *p): lap = conf.netrc(varhosts=("reciva.com", "radios.reciva.com")) if lap: log.LOGIN("Reciva account:", lap) ahttp.get("https://radios.reciva.com/account/login", { "username": lap[0] or lap[1], "password": lap[2] }, timeout=2)
def update_categories(self): self.categories = [] html = ahttp.get(self.base) main_cats = re.findall('<a href="/radios/main-genre/(\w+)">', html) for cat in main_cats: self.progress(main_cats) html = ahttp.get(self.base + "/main-genre/" + cat) sub = re.findall('<a href="/radios/genre/(\w+)">', html) self.categories.append(cat) self.categories.append(sub) self.progress(0) return self.categories
def update_streams(self, cat, search=None): # Assemble HTML (collect 1..9 into single blob prior extraction) html = "" page = 1 while page < 9: page_sfx = "/%s" % page if page > 1 else "" if cat: add = ahttp.get(self.base + self.catmap[cat] + page_sfx) elif search: add = ahttp.get(self.base + "stations" + page_sfx, { "text": search, "country_id": "", "genre_id": "" }) html += add if re.search('/\d+">Next</a>', add): page += 1 else: break # Extract all the things # # · entries utilize HTML5 microdata classification # · title and genre available right away # · img url is embedded # · keep station ID as `urn:liveradion:12345` # r = [] ls = re.findall( """ itemtype="http://schema.org/RadioStation"> .*? href="/stations/([\w-]+) .*? <img\s+src="/(files/images/[^"]+)" .*? ="country">([^<]+)< .*? itemprop="name"><a[^>]+>([^<]+)</a> .*? class="genre">([^<]+)< """, html, re.X | re.S) for row in ls: log.DATA(row) id, img, country, title, genre = row r.append( dict(homepage=self.base + "stations/" + id, url="urn:liveradio:" + id, playing=unhtml(country), title=unhtml(title), genre=unhtml(genre), img=self.base + img, img_resize=32)) return r
def install(self, p): src = ahttp.get(p["$file"], encoding="utf-8") name = p["$name"] with open("{}/{}.py".format(conf.plugin_dir, name), "w") as f: f.write(src) self.parent.status("Plugin '{}.py' installed.".format(name)) conf.add_plugin_defaults(plugin_meta(module=name), name)
def update_categories(self): html = ahttp.get("http://www.liveradio.ie/genres") self.categories = ["Top 20"] for row in re.findall( r"""<a href="/(stations/genre-[\w-]+)">([^<]+)</a>""", html): self.categories.append(unhtml(row[1])) self.catmap[unhtml(row[1])] = unhtml(row[0])
def update_categories(self): html = ahttp.get(self.base_url) #log.DATA( html ) self.categories = [] # Genre list in sidebar """ <li><a id="genre-90" href="/Genre?name=Adult" onclick="loadStationsByGenre('Adult', 90, 89); return false;">Adult</a></li> """ rx = re.compile( r"loadStationsByGenre\( '([^']+)' [,\s]* (\d+) [,\s]* (\d+) \)", re.X) subs = rx.findall(html) # group current = [] for (title, id, main) in subs: self.catmap[title] = int(id) if not int(main): self.categories.append(title) current = [] self.categories.append(current) else: current.append(title) # .categories/.catmap get saved by reload_categories() pass
def update_streams(self, cat): url = "http://modarchive.org/index.php" params = dict(query=self.catmap[cat], request="search", search_type="genre") html = ahttp.get(url, params) entries = [] rx_mod = re.compile(""" href="(https?://api\.modarchive\.org/downloads\.php[?]moduleid=(\d+)[#][^"]+)" .*? ="format-icon">(\w+)< .*? title="([^">]+)">([^<>]+)</a> .*? >(?:Rated|Unrated)</a>\s*(\d*) """, re.X|re.S) for uu in rx_mod.findall(html): (url, id, fmt, title, file, rating) = uu #log.DATA( uu ) entries.append({ "genre": cat, "url": url, "id": id, "format": "audio/mod+zip", "title": title, "playing": file, "listeners": int(rating if rating else 0), "homepage": "http://modarchive.org/index.php?request=view_by_moduleid&query="+id, }) # done return entries
def update_categories(self): html = ahttp.get("http://modarchive.org/index.php?request=view_genres") rx_current = re.compile(r""" >\s+(\w[^<>]+)\s+</h1> | <a\s[^>]+query=(\d+)&[^>]+>(\w[^<]+)</a> """, re.S|re.X) #-- archived shows sub = [] self.categories = [] for uu in rx_current.findall(html): (main, id, subname) = uu if main: self.categories.append(main) self.catmap[main] = 0 sub = [] self.categories.append(sub) else: sub.append(subname) self.catmap[subname] = id # .categories and .catmap are saved by reload_categories() pass
def update_streams(self, cat, search=None): r = [] # split into table rows html = ahttp.get(self.base.format(self.catmap[cat]), encoding="utf-8") for tr in html.split("<tr>"): # extract fields ls = re.findall(""" <a\s+href="([^<">]+)"> # homepage <b>([^<>]+)</b> # title .*? <td>([^<>]+)</td> # location .+ alt="(\w+)" # format .+ <a\s+href="([^<">]+)"> # url (\d+) # bitrate """, tr, re.X|re.S) # assemble into rows if len(ls) and len(ls[0]) == 6: homepage, title, location, format, url, bitrate = ls[0] genre = re.findall("<td>([^<>]+)</td>\s</tr>", tr) r.append(dict( homepage = homepage, playing = location, title = unhtml(title), url = url, genre = genre[0] if genre else cat, bitrate = int(bitrate), format = mime_fmt(format), )) return r
def update_streams(self, cat, search=None): html = ahttp.get( "http://www.publicradiofan.com/cgibin/statsearch.pl?format={}&lang=" .format(cat)) html = re.split("<H2>", html, 2, re.S)[1] probe = action.extract_playlist() r = [] for html in re.split("<TR VALIGN=TOP>", html, 0): m = re.search( r""" <A .*? HREF=" (?P<url> .+?) "> <B> (?P<title> .*?) </B> .*? stt> (?P<descr> .*?) [<&] .*? stt> (?P<genre> .*?) [<&] .*? <I> .*? HREF="(?P<homepage> .*?)" """, html, re.X | re.S) if m: r.append( dict( genre=m.group("genre"), url=m.group("url"), title=m.group("title"), playing=m.group("descr"), homepage=m.group("homepage"), listformat=probe.probe_ext(m.group("url")) or "srv", )) return r
def update_streams(self, cat): # prep block regex rx_genre = re.compile(r""" <tr\sid='(\d+)' .*? Station\sName:<.*?>([^<]+)</(?:span|font|td|a)> .*? ^var\slastsong_\d+\s*=\s*'([^\n]+)'; .*? <a[^>]+onClick=[^>]+&stream_id=(\d+)'[^>]+>(\d+)k< """, re.I|re.S|re.X|re.M) # collect pages into single string html = "" for page in range(0, self.conf_maxpages): self.progress(self.conf_maxpages, page) html += ahttp.get("%s?start=%s&g=%s&e=%s&s=" % (self.base_url, page * 10, self.catmap[cat], self.conf_family_unsafe)) if not re.search("\?start=%s.*>Next" % ((page + 1) * 10), html): break html = re.sub(">Featured Stations.+?>Previous Page", "", html, 100, re.S) # extract and convert to station rows entries = [] for uu in re.findall(rx_genre, html): print uu entries.append(dict( genre = cat, id = to_int(uu[0]), sid = to_int(uu[3]), title = unhtml(uu[1]), playing = unhtml(uu[2]), # actually JS decoding... format = "audio/mpeg", bitrate = to_int(uu[4]), url = self.pls_sffx % (self.base_url, uu[0], uu[3]) )) return entries
def google_find_homepage(row): """ Searches for missing homepage URL via Google. """ if row.get("url") not in tried_urls: tried_urls.append(row.get("url")) if row.get("title"): rx_t = re.compile('^(([^-:]+.?){1,2})') rx_u = re.compile( r''' (?: <h3\s+class="r"><a\s+href=" | /url\?q= ) (https?:// (?!www\.google|webcache|google|tunein|streema) [^"&]+)''', re.X) # Use literal station title now title = row["title"] #title = title.group(0).replace(" ", "%20") # Do 'le google search html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1, timeout=3.5) #log.DATA(re.sub("<(script|style)[^>]*>.*?</(script|style)>", "", html, 100, re.S)) # Find first URL hit url = rx_u.findall(html) if url: #log.DATA(url) row["homepage"] = ahttp.fix_url(url[0]) return True pass
def update_streams(self, cat): rx_link = re.compile( """ <a\shref="(http://punkcast.com/(\d+)/index.html)"> .*? ALT="([^<">]+)" """, re.S | re.X) entries = [] #-- all from frontpage html = ahttp.get("http://www.punkcast.com/") for uu in rx_link.findall(html): (homepage, id, title) = uu entries.append({ "genre": "%s" % id, "title": title, "playing": "PUNKCAST #%s" % id, "format": "audio/mpeg", "url": "none:", "homepage": homepage, "img": "http://punkcast.com/%s/PUNK%s.jpg" % (id, id) if conf.punkcast_img else None, }) # done return entries
def update_streams(self, cat): rx_title = re.compile('<a\s+href="([^">]+)"[^>]+target="_blank"[^>]*>(.+?)</a>', re.I) rx_urls = re.compile('<a href="([^">]+)">(\d+)(?: Kbps)*</a>', re.I) rx_genre = re.compile('<td[^>]+>(\w*[^<>]*)</td>\s*<td[^>]+>(\w+[^<>]+)</td>\s*$', re.I) entries = [] html = ahttp.get("http://radiolist.net/" + self.catmap[cat]) for block in re.findall("<tr>(.+?)</tr>", html, re.S): ut = re.findall(rx_title, block) # homepage+title uu = re.findall(rx_urls, block) # urls+bitrates lg = re.findall(rx_genre, block) # location+genre #print ut, uu, lg if ut and uu and lg: url, br = self.best_url(uu) entries.append(dict( homepage = ut[0][0], title = unhtml(ut[0][1]), url = url, bitrate = br, format = self.mime_guess(url, "audio/mpeg"), listformat = self.list_guess(url), playing = lg[0][0], genre = lg[0][1] )) # done [log.DATA(e) for e in entries] return entries
def update_categories(self): html = ahttp.get(self.base_url) rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""") cats = rx.findall(html) cats = list(set(cats)) cats = [s.capitalize() for s in cats] self.categories = sorted(list(set(cats)))
def api(self, method, params={}, post=False): j = ahttp.get(self.base + method, params, post=post) try: return json.loads( j, strict=False ) # some entries contain invalid character encodings except: return []
def resolve_urn(self, row): if row.get("url").startswith("urn:delicast"): html = ahttp.get(row["homepage"]) ls = re.findall("^var url = \"(.+)\";", html, re.M) if ls: row["url"] = unhtml(ls[0]) else: log.ERR("No stream found on %s" % row["homepage"]) return row
def update_streams(self, cat, search=None): entries = [] if cat: html = ahttp.get(self.base_url % (self.catmap[cat.lower()], conf.max_streams)) else: # search html = ahttp.get( "https://radios.reciva.com/stations/search?q=%s&categories=&codec=&min_bitrate=&max_bitrate=&working=true&count=%s" % (search, conf.max_streams)) if not html: log.ERR( "No results from http://radios.reciva.com/ server. Their category browsing sometimes breaks. We're not using the search function as that would strain their server too much. You might try adding login credentials to `.netrc` - albeit that rarely helps.", html) return [] # extract for row in (pq(row) for row in pq(html).find("#mytable").find( ".oddrow, .evenrow")): u = row.find(".streamlink") if u: id = re.findall("(\d+)", u.attr("href"))[0] entries.append({ "title": row.find(".stationName").text(), "id": id, "url": "urn:reciva:%s" % id, "homepage": "https://radios.reciva.com/station/%s" % id, "playing": row.find(".stationLocation").text(), "genre": row.find(".stationGenre").text(), "format": mime_fmt(row.find(".streamCodec").text()[0:3]), "bitrate": int( re.findall("\d+", row(".streamCodec").text()[4:] + " 0")[0]), }) # done return entries
def row(self): r = ChannelPlugin.row(self) url = r.get("url") if url and url.find("/profile/") > 0: html = ahttp.get(url) ls = re.findall(r""" Mp3Url [\\\\:"]+ (http[^\\\\"]+) """, html, re.M|re.X) if ls: log.URL(ls[0]) r["url"] = ls[0] return r
def play(self, row): audio = "audio/mp3" r = [] for e in pq(ahttp.get(row["homepage"])).find("enclosure"): r.append(e.get("url")) audio = e.get("type") if r: action.action.play(r[0], audioformat=audio, listformat="url/direct")
def from_web(self, cat): ucat = re.sub("\W+", "-", cat.lower().replace("'", "")) html = ahttp.get("http://filtermusic.net/{}".format(ucat)) ls = re.findall("""<h4>(.*?)</h4><p>(.*?)</p>.*?href='(.*?)'""", html) r = [ dict(genre=cat, title=title, playing=descr, url=url) for title, descr, url in ls ] return r
def resolve_urn(self, row): if row.get("url", "-").find("urn:streema:") != 0: return id = row["url"][12:] html = ahttp.get("http://streema.com/radios/play/%s" % id) url = re.findall('<ul class="stream-downloads">.+?<a href="(.+?)"', html, re.S) if not url: return row["url"] = url[0]
def banner_localcopy(url, fn, resize=None): # Check URL and target filename if not re.match("^https?://[\w.-]{10}", url): return False # Fetch and save imgdata = ahttp.get(url, binary=1, verify=False) if imgdata: return store_image(imgdata, fn, resize)
def fetch_ranked_xspf(self): xspf = ahttp.get("http://magnatune.com/genres/m3u/ranked_all.xspf") xspf = re.sub("/([^</>]+)\.mp3</location>", self.urlenc, xspf) cnv = action.extract_playlist(text=xspf) rows = [ dict(title=unhtml(r["playing"]), url=r["url"], homepage=r["homepage"], genre="album") for r in cnv.rows("xspf") ] return rows
def update_streams(self, cat, search=None): ls = json.loads(ahttp.get("http://listen.{}/public1".format(cat))) rows = [ dict(genre=row["key"], title=row["name"], url=row["playlist"], id=row["key"], homepage="http://www.{}/{}".format(cat, row["key"]), bitrate=64) for row in ls ] return rows
def update_streams(self, cat, search=None, max_pages=10): r = [] # fetch html = "" if search: # pretty much identical (except first page should be /dir/?action=search and POST field) cat = search max_pages = 1 for i in range(1, max_pages + 1): html += ahttp.get("%s/%s/page%s" % (self.base, cat, i)) if not re.search('href="/dir/%s/page%s">Next' % (cat, i + 1), html): break # extract ls = re.findall( """ <tr> .*? <audio\s+id="jp_audio_(\d+)" .*? src="([^"]+?);?"> .*? <h4.*?>([^<>]+)</a></h4> .*? <b>([^<>]*)</b> .*? <b\s+class[^>]+>([^<>]*)</b> .*? Genre:(.+?)</td> .*? </i>\s*(\d+)\s*Kbps<br>\s*(audio/[\w.-]+) """, html, re.X | re.S) #log.DATA(re.findall("(<audio.*?>)", html)) # blocks for row in ls: try: log.UI(row) r.append( dict( id=row[0], url=row[1], title=unhtml(row[2]), playing=unhtml(row[3]), homepage=unhtml(row[4]), genre=unhtml(row[5]), bitrate=to_int(row[6]), format=row[7], #listeners = to_int(listeners[5]) )) except: pass #some field missing # done return r # (disabled): collect genres for row in r: for c in re.findall("(\w+)", row["genre"]): if c not in self.categories: self.categories.append(c)
def update_streams(self, cat, search=None): # fetch page wiki = ahttp.get(self.base[cat], verify=False) f = "audio/mpeg" if cat == "stations" else "video/mp4" # split on headlines r = [] for src in re.split("^==+", wiki, 0, re.M): r += self.join(src, f) return r