def google_find_homepage(row): """ Searches for missing homepage URL via Google. """ if row.get("url") not in tried_urls: tried_urls.append(row.get("url")) if row.get("title"): rx_t = re.compile('^(([^-:]+.?){1,2})') rx_u = re.compile( r''' (?: <h3\s+class="r"><a\s+href=" | /url\?q= ) (https?:// (?!www\.google|webcache|google|tunein|streema) [^"&]+)''', re.X) # Use literal station title now title = row["title"] #title = title.group(0).replace(" ", "%20") # Do 'le google search html = ahttp.get("http://www.google.com/search", params=dict(hl="en", q=title, client="streamtuner2"), ajax=1, timeout=3.5) #log.DATA(re.sub("<(script|style)[^>]*>.*?</(script|style)>", "", html, 100, re.S)) # Find first URL hit url = rx_u.findall(html) if url: #log.DATA(url) row["homepage"] = ahttp.fix_url(url[0]) return True pass
def unpack(self, r): listeners = 0 # find stream if len(r.get("streams", [])): # compare against first entry s = r["streams"][0] # select "best" stream if there are alternatives if len(r["streams"]) > 0: for alt in r["streams"]: listeners += alt.get("listeners", 0) # set defaults if not alt.get("content_type"): alt["content_type"] = "?" if not alt.get("bitrate"): alt["bitrate"] = 16 alt["content_type"] = alt["content_type"].strip( ) # There's a "\r\n" in nearly every entry :? # weight format with bitrate cur_q = self.format_q.get( s["content_type"], "0.9") \ * s.get("bitrate", 32) alt_q = self.format_q.get(alt["content_type"], "0.9") \ * alt.get("bitrate", 32) # swap out for overall better score if alt_q > cur_q: s = alt #log.DATA_BETTER_STREAM(s, "←FROM←", r) # fix absent audio type if not s.get("content_type") or len(s["content_type"]) < 7: s["content_type"] = "audio/mpeg" #log.DATA(s) else: return {} # rename fields return dict( genre=" ".join(c["slug"] for c in r["categories"]), title=r["name"], playing="{} {}".format(r.get("country"), r.get("description", "")), homepage=ahttp.fix_url(r["website"]), url=s["stream"], format=s["content_type"], bitrate=s["bitrate"], listeners=listeners, img=r.get("image", {}).get("thumb", {}).get("url", ""), # CDN HTTPS trip up requests.get img_resize=32, state=self.state_map.get(int(s["status"]), ""), deleted=s.get("timedout", False), )
def postprocess_filter_homepage(self, row, channel): if not row.get("homepage"): url = self.rx_www_url.search(row.get("title", "")) if url: url = url.group(0).lower().replace(" ", "") url = (url if url.find("www.") == 0 else "www." + url) row["homepage"] = ahttp.fix_url(url) return True
def with_dom(self, html_list): log.PROC("internet-radio, dom") rx_numbers = re.compile("(\d+)") r = [] for html in html_list: # the streams are arranged in table rows doc = pq(html) for dir in (pq(e) for e in doc("tr")): #log.HTML(dir) # bitrate/listeners bl = dir.find("p") if bl: bl = rx_numbers.findall(str(bl.text()) + " 0 0") else: bl = [0, 0] # stream url url = dir.find("i").eq(0).parent().attr("onclick") if url: url = re.search("(http://[^\'\"\>]+)", url) if url: url = url.group(0) else: url = "" else: url = "" row = { "title": dir.find("h4").text(), "homepage": ahttp.fix_url(dir.find("a.small").attr("href") or ""), "url": url, "genre": dir.find("a[href^='/stations/']").text() or "", "listeners": int(bl[0]), "bitrate": int(bl[1]), "format": "audio/mpeg", "playing": dir.find("b").text(), } #log.DATA(row) r.append(row) return r
def share(self, *w): # get data row = self.parent.row() if row: row = copy.copy(row) # convert PLS/M3U link to direct ICY stream url if conf.myoggradio_morph and self.parent.channel( ).listformat != "url/direct": urls = action.convert_playlist(row["url"], row.get("listformat", "any"), "srv", local_file=False, row=row) if not urls: urls = [row["url"]] row["url"] = ahttp.fix_url(urls[0]) # prevent double check-ins if not self.streams.get("common"): log.WARN( "Cache empty. Cannot compare stream info for newness. Please reload MyOggRadio channel first." ) return if row["title"] in (r.get("title") for r in self.streams["common"]): pass elif row["url"] in (r.get("url") for r in self.streams["common"]): pass # send else: self.status("Sharing station URL...") if (self.upload(row)): # artificial slowdown, else user will assume it didn't work self.status(0.5) time.sleep(0.1) # tell Gtk we've handled the situation self.status("Shared '" + row["title"][:30] + "' on MyOggRadio.org", icon="gtk-save") else: self.status() return True
def update_streams(self, cat, search=None): ucat = re.sub("\W+", "", cat.lower()) html = ahttp.get(self.base.format(ucat, conf.windowsmedia_culture)) # onclick="Listen('31e11281-cf43-4d39-9164-77721604380b', 'DJ Perry Radio', 'http://www.djperryradio.com/', 'More Stations', '20', true);"> r = [] ls = re.findall(r""" onclick="Listen\('([\w\-]+)',\s*'(.+?)',\s*'(.+?)', """, html, re.X|re.S) for id, title, homepage in ls: r.append(dict( id = id, title = unhtml(title), homepage = ahttp.fix_url(homepage), url = self._url.format(id, conf.windowsmedia_culture), bitrate = 0, )) print r return r
def with_regex(self, html): log.PROC("internet-radio, regex") r = [] html = "\n".join(html) # Break up into <tr> blocks before extracting bits rx_tr = re.compile("""<tr[^>]*>(.+?)</tr>""", re.S) rx_data = re.compile( r""" playjp',\s*'(https?://[^'">]+) .*? <h4.*?>([^<>]+)</ .*? <b>([^<>]*)</b> (?: .*? href="(.*?)" )? (?: .*? Genres:((?:</?a[^>]+>|\w+|\s+)+) )? .*? (\d+)\s*Listeners .*? (\d+)\s*Kbps """, re.S | re.X) for div in rx_tr.findall(html): if div.find('id="pagination"') < 0: #log.DATA(len(div)) uu = rx_data.search(div) if uu: (url, title, playing, homepage, genres, listeners, bitrate) = uu.groups() # transform data r.append({ "url": url, "genre": strip_tags(genres or ""), "homepage": ahttp.fix_url(homepage or ""), "title": nl(title or ""), "playing": nl(playing or ""), "bitrate": int(bitrate or 0), "listeners": int(listeners or 0), "format": "audio/mpeg", # there is no stream info on that, but internet-radio.org.uk doesn't seem very ogg-friendly anyway, so we assume the default here }) else: log.DATA("Regex couldn't decipher entry:", div) return r
def m3u(self, rows): txt = "#EXTM3U\n" for r in rows: txt += "#EXTINF:-1,%s\n" % r["title"] txt += "%s\n" % ahttp.fix_url(r["url"]) return txt