def get_desc(cityname, cityshort): if cache is not None: r = cache.get('airpollution.%s' % (cityshort)) if r: return r title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower()) r = requests.get(title_link, headers=headers) r.encoding = 'utf-8' p = r.text soup = BeautifulSoup(p) aqiwgtinfo = soup.find(id="aqiwgtinfo'").text aqivalue = soup.find("div", {'class': 'aqivalue'}).text min_pm25 = soup.find(id='min_pm25').text max_pm25 = soup.find(id='max_pm25').text text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format( cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue, max_pm25, min_pm25) if cache is not None: cache.set('airpollution.%s' % (cityshort), text, 1800) image_url = soup.find(id='tr_pm25').find( id='td_pm25').find('img').attrs.get('src') title = soup.find('title').text attaches = [ gen_attachment(text, image_url, title=title, title_link=title_link) ] return text, attaches
def fetch2cache(node_name): print 'Fetch {}'.format(node_name) r = requests.get(FEED_URL.format(node_name), verify=False) root = etree.fromstring(r.text.encode('utf-8')) entries = root.findall('{http://www.w3.org/2005/Atom}entry') node_key = NODE_KEY.format(node_name) feeds = cache.get(node_key) or [] new_feeds = [] for entry in entries: topic = {} id = entry[2].text.rpartition('/')[-1] key = TOPIC_KEY.format(id) for el in entry: for tag in TAGS: if el.tag.endswith(tag): res = el.text if tag in ('published', 'updated'): res = datetime.strptime(res, '%Y-%m-%dT%H:%M:%SZ') topic[tag] = res topic['node'] = node_name cache.set(key, topic, ONE_DAY) new_feeds.append(id) if new_feeds: new_feeds += feeds[:MAX_FEEDS_LEN - len(new_feeds)] interval = get_updated_interval(node_name, new_feeds) cache.set(node_key, new_feeds, interval)
def fetch2cache(node_name): print "Fetch {}".format(node_name) r = requests.get(FEED_URL.format(node_name), verify=False) root = etree.fromstring(r.text.encode("utf-8")) entries = root.findall("{http://www.w3.org/2005/Atom}entry") node_key = NODE_KEY.format(node_name) feeds = cache.get(node_key) or [] new_feeds = [] for entry in entries: topic = {} id = entry[2].text.rpartition("/")[-1] key = TOPIC_KEY.format(id) for el in entry: for tag in TAGS: if el.tag.endswith(tag): res = el.text if tag in ("published", "updated"): res = datetime.strptime(res, "%Y-%m-%dT%H:%M:%SZ") topic[tag] = res topic["node"] = node_name cache.set(key, topic, ONE_DAY) new_feeds.append(id) if new_feeds: new_feeds += feeds[: MAX_FEEDS_LEN - len(new_feeds)] interval = get_updated_interval(node_name, new_feeds) cache.set(node_key, new_feeds, interval)
def get_desc(cityname, cityshort): if cache is not None: r = cache.get('airpollution.%s' % (cityshort)) if r: return r title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower()) r = requests.get(title_link, headers=headers) r.encoding = 'utf-8' p = r.text soup = BeautifulSoup(p) aqiwgtinfo = soup.find(id="aqiwgtinfo'").text aqivalue = soup.find("div", {'class': 'aqivalue'}).text min_pm25 = soup.find(id='min_pm25').text max_pm25 = soup.find(id='max_pm25').text text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format( cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue, max_pm25, min_pm25) if cache is not None: cache.set('airpollution.%s' % (cityshort), text, 1800) image_url = soup.find(id='tr_pm25').find(id='td_pm25').find( 'img').attrs.get('src') title = soup.find('title').text attaches = [gen_attachment(text, image_url, title=title, title_link=title_link)] return text, attaches
def handle(data): if cache is not None: r = cache.get(key) if r: return random.choice(r) r = urllib2.urlopen('http://feedproxy.feedburner.com/qiubai', timeout=60) p = r.read() r = re.findall('<\!\[CDATA\[<p>(.*)<br/>', p) if r: if cache is not None: cache.set(key, r, 1800) return random.choice(r) else: raise Exception