def updateSongs(self, *args, **kargs): """通知豆瓣FM,处理结果,更新歌曲列表,参数同 _open()""" while True: try: response = None response = self._open(*args, **kargs) data = response.read() response.close() j = json.loads(data) songs = map(self._buildSong, j["song"]) self.songs = songs return except UnicodeDecodeError: # 有时候 json 会有不是 utf-8 的字符 logger.debug(u"解析歌曲列表 JSON 异常 url = %s", util.decode(response.geturl())) logger.debug(response.headers) logger.debug(data) continue except Exception: logger.exception(u"解析歌曲列表异常 url = %s", util.decode(response.geturl())) logger.error(response.headers) logger.error(data) raise finally: if response: response.close()
def update(self): songs = [] home = 'http://soundofhope.org' label_l1 = util.decode(self.source.conf.get('label_l1')) label_l2 = util.decode(self.source.conf.get('label_l2')) response = self.opener.open(home, timeout=config.getint('timeout', 30)) html = etree.parse(response, etree.HTMLParser()) for l1 in html.findall('//div[@id="nav-site"]/ul/li[a]'): a = l1.find('a') if a.text.find(label_l1) != -1: break else: logger.warning(u'没有找到一级标签 label_l1 = %s', label_l1) return songs for l2 in l1.findall('div/ul/li/a'): if l2.text.find(label_l2) != -1: break else: logger.warning(u'没有找到二级标签 label_l1 = %s label_l2 = %s', (label_l1, label_l2)) return songs items = urlparse.urljoin(home, l2.get('href')) response = self.opener.open(items, timeout=config.getint('timeout', 30)) html = etree.parse(response, etree.HTMLParser()) for item in html.findall('//div[@id="CategoryCol_mid"]/div/ul/li/a'): url = urlparse.urljoin(items, item.get('href')).strip() if self.last_id and self.last_id == url: break song = rss.Song() song.id = url song.title = item.text.strip() response = self.opener.open(url, timeout=config.getint('timeout', 30)) html = etree.parse(response, etree.HTMLParser()) div = html.find('//div[@id="columnfirstnei2"]') pubDate = div.find('div[@class="subtitle"]/span[@class="date"]') song.pubDate = pubDate.text mp3 = div.find('.//div[@class="mp3_links"]/a') song.url = urlparse.urljoin(url, mp3.get('href')) songs.append(song) if not self.last_id and len(songs) >= self.init_count: break songs.reverse() return songs
def _open(self, type="n", sid=None, channel=0, pt=None): params = {} if type: params["type"] = type if sid: params["sid"] = sid if channel != None: params["channel"] = channel if pt != None: params["pt"] = "%.1f" % pt params["from"] = "mainsite" params["r"] = self.random() url = self.url if params: url = "".join([url, "?", urllib.urlencode(params)]) logger.info(u"请求URL %s", util.decode(url)) response = self.opener.open(url, timeout=config.getint("timeout", 30)) return response
def __init__(self, conf): self.conf = conf self.name = self.conf.getName() self.last_id = None self.cur_id = None self.song = None self.songs = OrderedDict() self.cachedir = self.conf.getCacheDir() util.initDir(self.cachedir) self.cur_file = os.path.join(self.cachedir, 'cur') if os.path.exists(self.cur_file): with open(self.cur_file) as f: self.cur_id = util.decode(f.read()).strip() self.pre_download = False if 'pre_download' in self.conf: self.pre_download = self.conf.getboolean('pre_download') self.loadCache() self.clearCache() self.saveCache() self.init_count = 1 if 'init_count' in self.conf: self.init_count = self.conf.getint('init_count') self.proxy_enable = False self.proxy = None if 'proxy_enable' in self.conf: self.proxy_enable = self.conf.getboolean('proxy_enable') if 'proxy' in self.conf: self.proxy = self.conf.get('proxy') self.updating = False update_on_startup = False if 'update_on_startup' in self.conf: update_on_startup = self.conf.getboolean('update_on_startup') if update_on_startup: self.update()