def fetch2cache(node_name): print 'Fetch {}'.format(node_name) r = requests.get(FEED_URL.format(node_name), verify=False) root = etree.fromstring(r.text.encode('utf-8')) entries = root.findall('{http://www.w3.org/2005/Atom}entry') node_key = NODE_KEY.format(node_name) feeds = cache.get(node_key) or [] new_feeds = [] for entry in entries: topic = {} id = entry[2].text.rpartition('/')[-1] key = TOPIC_KEY.format(id) for el in entry: for tag in TAGS: if el.tag.endswith(tag): res = el.text if tag in ('published', 'updated'): res = datetime.strptime(res, '%Y-%m-%dT%H:%M:%SZ') topic[tag] = res topic['node'] = node_name cache.set(key, topic, ONE_DAY) new_feeds.append(id) if new_feeds: new_feeds += feeds[:MAX_FEEDS_LEN - len(new_feeds)] interval = get_updated_interval(node_name, new_feeds) cache.set(node_key, new_feeds, interval)
def get_desc(cityname, cityshort): if cache is not None: r = cache.get('airpollution.%s' % (cityshort)) if r: return r title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower()) r = requests.get(title_link, headers=headers) r.encoding = 'utf-8' p = r.text soup = BeautifulSoup(p) aqiwgtinfo = soup.find(id="aqiwgtinfo'").text aqivalue = soup.find("div", {'class': 'aqivalue'}).text min_pm25 = soup.find(id='min_pm25').text max_pm25 = soup.find(id='max_pm25').text text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format( cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue, max_pm25, min_pm25) if cache is not None: cache.set('airpollution.%s' % (cityshort), text, 1800) image_url = soup.find(id='tr_pm25').find( id='td_pm25').find('img').attrs.get('src') title = soup.find('title').text attaches = [ gen_attachment(text, image_url, title=title, title_link=title_link) ] return text, attaches
def fetch2cache(node_name): print "Fetch {}".format(node_name) r = requests.get(FEED_URL.format(node_name), verify=False) root = etree.fromstring(r.text.encode("utf-8")) entries = root.findall("{http://www.w3.org/2005/Atom}entry") node_key = NODE_KEY.format(node_name) feeds = cache.get(node_key) or [] new_feeds = [] for entry in entries: topic = {} id = entry[2].text.rpartition("/")[-1] key = TOPIC_KEY.format(id) for el in entry: for tag in TAGS: if el.tag.endswith(tag): res = el.text if tag in ("published", "updated"): res = datetime.strptime(res, "%Y-%m-%dT%H:%M:%SZ") topic[tag] = res topic["node"] = node_name cache.set(key, topic, ONE_DAY) new_feeds.append(id) if new_feeds: new_feeds += feeds[: MAX_FEEDS_LEN - len(new_feeds)] interval = get_updated_interval(node_name, new_feeds) cache.set(node_key, new_feeds, interval)
def get_desc(cityname, cityshort): if cache is not None: r = cache.get('airpollution.%s' % (cityshort)) if r: return r title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower()) r = requests.get(title_link, headers=headers) r.encoding = 'utf-8' p = r.text soup = BeautifulSoup(p) aqiwgtinfo = soup.find(id="aqiwgtinfo'").text aqivalue = soup.find("div", {'class': 'aqivalue'}).text min_pm25 = soup.find(id='min_pm25').text max_pm25 = soup.find(id='max_pm25').text text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format( cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue, max_pm25, min_pm25) if cache is not None: cache.set('airpollution.%s' % (cityshort), text, 1800) image_url = soup.find(id='tr_pm25').find(id='td_pm25').find( 'img').attrs.get('src') title = soup.find('title').text attaches = [gen_attachment(text, image_url, title=title, title_link=title_link)] return text, attaches
def fetch(force=False): ids = set() for node in NODES: node_key = NODE_KEY.format(node) res = cache.get(node_key) if res and not force: ids.update(res) continue fetch2cache(node) ids.update(cache.get(node_key)) ids = list(set(ids)) def _key(id): topic = cache.get(TOPIC_KEY.format(id)) if not topic: return datetime(1970, 1, 1) return topic['published'] return sorted(ids, key=_key, reverse=True)[:MAX_FEEDS_LEN]
def fetch(force=False): ids = set() for node in NODES: node_key = NODE_KEY.format(node) res = cache.get(node_key) if res and not force: ids.update(res) continue fetch2cache(node) ids.update(cache.get(node_key)) ids = list(set(ids)) def _key(id): topic = cache.get(TOPIC_KEY.format(id)) if not topic: return datetime(1970, 1, 1) return topic["published"] return sorted(ids, key=_key, reverse=True)[:MAX_FEEDS_LEN]
def handle(data): if cache is not None: r = cache.get(key) if r: return random.choice(r) r = urllib2.urlopen('http://feedproxy.feedburner.com/qiubai', timeout=60) p = r.read() r = re.findall('<\!\[CDATA\[<p>(.*)<br/>', p) if r: if cache is not None: cache.set(key, r, 1800) return random.choice(r) else: raise Exception
def handle(data): message = data["message"] ids = fetch(force=(True if u"刷新" in message else False)) contents = [] for id in ids: topic = cache.get(TOPIC_KEY.format(id)) if not topic: continue node = topic["node"] msg = u"<{0}|{1} [{2}]> <{3}|{4}>".format( TOPIC_URL.format(id), cgi.escape(topic["title"]), topic["published"], NODE_URL.format(node), node ) contents.append(msg) return "\n".join(contents)
def handle(data): message = data['message'] ids = fetch(force=(True if u'刷新' in message else False)) contents = [] for id in ids: topic = cache.get(TOPIC_KEY.format(id)) if not topic: continue node = topic['node'] msg = u'<{0}|{1} [{2}]> <{3}|{4}>'.format(TOPIC_URL.format(id), cgi.escape(topic['title']), topic['published'], NODE_URL.format(node), node) contents.append(msg) return '\n'.join(contents)
def get_updated_interval(node_name, feeds, default=ONE_DAY): updated_times = [] for id in feeds: topic = cache.get(TOPIC_KEY.format(id)) if topic: updated_times.append(topic['updated']) else: print 'topic {} not cached!'.format(id) min = default for i in range(len(updated_times) - 1): sec = (updated_times[i] - updated_times[i + 1]).total_seconds() if sec < min: min = sec if min < ONE_MINUTE: min = ONE_MINUTE break return min
def get_updated_interval(node_name, feeds, default=ONE_DAY): updated_times = [] for id in feeds: topic = cache.get(TOPIC_KEY.format(id)) if topic: updated_times.append(topic["updated"]) else: print "topic {} not cached!".format(id) min = default for i in range(len(updated_times) - 1): sec = (updated_times[i] - updated_times[i + 1]).total_seconds() if sec < min: min = sec if min < ONE_MINUTE: min = ONE_MINUTE break return min
def _key(id): topic = cache.get(TOPIC_KEY.format(id)) if not topic: return datetime(1970, 1, 1) return topic['published']
def _key(id): topic = cache.get(TOPIC_KEY.format(id)) if not topic: return datetime(1970, 1, 1) return topic["published"]