Example #1
0
 def update(self):
     rss = self.source.conf.get('rss')
     logger.debug(u'解析 rss %s', rss)
     response = self.opener.open(rss, timeout=config.getint('timeout', 30))
     tree = etree.parse(response)
     songs = []
     for item in tree.findall('channel/item'):
         song = Song()
         for e in item.findall('enclosure'):
             t = e.get('type')
             if t and t.startswith('audio/'):
                 song.url = e.get('url')
                 break
         else:
             continue
         song.title = item.find('title').text
         song.id = item.find('guid').text.strip()
         song.pubDate = item.find('pubDate').text
         if self.last_id and song.id == self.last_id:
             break
         songs.append(song)
         if not self.last_id and len(songs) >= self.init_count:
             break
     songs.reverse()
     return songs
Example #2
0
    def update(self):
        songs = []
        home = 'http://soundofhope.org'
        label_l1 = util.decode(self.source.conf.get('label_l1'))
        label_l2 = util.decode(self.source.conf.get('label_l2'))
        response = self.opener.open(home, timeout=config.getint('timeout', 30))
        html = etree.parse(response, etree.HTMLParser())
        for l1 in html.findall('//div[@id="nav-site"]/ul/li[a]'):
            a = l1.find('a')
            if a.text.find(label_l1) != -1:
                break
        else:
            logger.warning(u'没有找到一级标签 label_l1 = %s', label_l1)
            return songs

        for l2 in l1.findall('div/ul/li/a'):
            if l2.text.find(label_l2) != -1:
                break
        else:
            logger.warning(u'没有找到二级标签 label_l1 = %s label_l2 = %s', (label_l1, label_l2))
            return songs

        items = urlparse.urljoin(home, l2.get('href'))

        response = self.opener.open(items, timeout=config.getint('timeout', 30))
        html = etree.parse(response, etree.HTMLParser())
        for item in html.findall('//div[@id="CategoryCol_mid"]/div/ul/li/a'):
            url = urlparse.urljoin(items, item.get('href')).strip()
            if self.last_id and self.last_id == url:
                break
            song = rss.Song()
            song.id = url
            song.title = item.text.strip()
            response = self.opener.open(url, timeout=config.getint('timeout', 30))
            html = etree.parse(response, etree.HTMLParser())
            div = html.find('//div[@id="columnfirstnei2"]')
            pubDate = div.find('div[@class="subtitle"]/span[@class="date"]')
            song.pubDate = pubDate.text
            mp3 = div.find('.//div[@class="mp3_links"]/a')
            song.url = urlparse.urljoin(url, mp3.get('href'))
            songs.append(song)
            if not self.last_id and len(songs) >= self.init_count:
                break

        songs.reverse()
        return songs
Example #3
0
 def download(self, song):
     logger.debug(u'下载歌曲 %s', song.url)
     suffix = util.getSuffix(song.url)
     if not suffix:
         suffix = '.mp3'
     fd, path = tempfile.mkstemp(suffix, '', self.source.cachedir)
     response = self.opener.open(song.url, timeout=config.getint('timeout', 30))
     while True:
         data = response.read(4096)
         if not data:
             break
         os.write(fd, data)
     response.close()
     os.close(fd)
     song.file = path
     logger.debug(u'下载完成 <%s> %s', path, song.url)
Example #4
0
 def _open(self, type="n", sid=None, channel=0, pt=None):
     params = {}
     if type:
         params["type"] = type
     if sid:
         params["sid"] = sid
     if channel != None:
         params["channel"] = channel
     if pt != None:
         params["pt"] = "%.1f" % pt
     params["from"] = "mainsite"
     params["r"] = self.random()
     url = self.url
     if params:
         url = "".join([url, "?", urllib.urlencode(params)])
     logger.info(u"请求URL %s", util.decode(url))
     response = self.opener.open(url, timeout=config.getint("timeout", 30))
     return response