Example #1
0
def fetch2cache(node_name):
    print 'Fetch {}'.format(node_name)
    r = requests.get(FEED_URL.format(node_name), verify=False)
    root = etree.fromstring(r.text.encode('utf-8'))
    entries = root.findall('{http://www.w3.org/2005/Atom}entry')
    node_key = NODE_KEY.format(node_name)
    feeds = cache.get(node_key) or []
    new_feeds = []
    for entry in entries:
        topic = {}
        id = entry[2].text.rpartition('/')[-1]
        key = TOPIC_KEY.format(id)
        for el in entry:
            for tag in TAGS:
                if el.tag.endswith(tag):
                    res = el.text
                    if tag in ('published', 'updated'):
                        res = datetime.strptime(res, '%Y-%m-%dT%H:%M:%SZ')
                    topic[tag] = res
            topic['node'] = node_name
        cache.set(key, topic, ONE_DAY)
        new_feeds.append(id)
    if new_feeds:
        new_feeds += feeds[:MAX_FEEDS_LEN - len(new_feeds)]
        interval = get_updated_interval(node_name, new_feeds)
        cache.set(node_key, new_feeds, interval)
Example #2
0
def get_desc(cityname, cityshort):
    if cache is not None:
        r = cache.get('airpollution.%s' % (cityshort))
        if r:
            return r

    title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower())

    r = requests.get(title_link, headers=headers)
    r.encoding = 'utf-8'
    p = r.text

    soup = BeautifulSoup(p)
    aqiwgtinfo = soup.find(id="aqiwgtinfo'").text
    aqivalue = soup.find("div", {'class': 'aqivalue'}).text
    min_pm25 = soup.find(id='min_pm25').text
    max_pm25 = soup.find(id='max_pm25').text
    text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format(
        cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue,
        max_pm25, min_pm25)
    if cache is not None:
        cache.set('airpollution.%s' % (cityshort), text, 1800)
    image_url = soup.find(id='tr_pm25').find(
        id='td_pm25').find('img').attrs.get('src')
    title = soup.find('title').text
    attaches = [
        gen_attachment(text, image_url, title=title, title_link=title_link)
    ]
    return text, attaches
Example #3
0
def fetch2cache(node_name):
    print "Fetch {}".format(node_name)
    r = requests.get(FEED_URL.format(node_name), verify=False)
    root = etree.fromstring(r.text.encode("utf-8"))
    entries = root.findall("{http://www.w3.org/2005/Atom}entry")
    node_key = NODE_KEY.format(node_name)
    feeds = cache.get(node_key) or []
    new_feeds = []
    for entry in entries:
        topic = {}
        id = entry[2].text.rpartition("/")[-1]
        key = TOPIC_KEY.format(id)
        for el in entry:
            for tag in TAGS:
                if el.tag.endswith(tag):
                    res = el.text
                    if tag in ("published", "updated"):
                        res = datetime.strptime(res, "%Y-%m-%dT%H:%M:%SZ")
                    topic[tag] = res
            topic["node"] = node_name
        cache.set(key, topic, ONE_DAY)
        new_feeds.append(id)
    if new_feeds:
        new_feeds += feeds[: MAX_FEEDS_LEN - len(new_feeds)]
        interval = get_updated_interval(node_name, new_feeds)
        cache.set(node_key, new_feeds, interval)
Example #4
0
def get_desc(cityname, cityshort):
    if cache is not None:
        r = cache.get('airpollution.%s' % (cityshort))
        if r:
            return r

    title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower())

    r = requests.get(title_link, headers=headers)
    r.encoding = 'utf-8'
    p = r.text

    soup = BeautifulSoup(p)
    aqiwgtinfo = soup.find(id="aqiwgtinfo'").text
    aqivalue = soup.find("div", {'class': 'aqivalue'}).text
    min_pm25 = soup.find(id='min_pm25').text
    max_pm25 = soup.find(id='max_pm25').text
    text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format(
        cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue,
        max_pm25, min_pm25)
    if cache is not None:
        cache.set('airpollution.%s' % (cityshort), text, 1800)
    image_url = soup.find(id='tr_pm25').find(id='td_pm25').find(
        'img').attrs.get('src')
    title = soup.find('title').text
    attaches = [gen_attachment(text, image_url, title=title,
                               title_link=title_link)]
    return text, attaches
Example #5
0
def fetch(force=False):
    ids = set()
    for node in NODES:
        node_key = NODE_KEY.format(node)
        res = cache.get(node_key)
        if res and not force:
            ids.update(res)
            continue
        fetch2cache(node)
        ids.update(cache.get(node_key))
    ids = list(set(ids))

    def _key(id):
        topic = cache.get(TOPIC_KEY.format(id))
        if not topic:
            return datetime(1970, 1, 1)
        return topic['published']

    return sorted(ids, key=_key, reverse=True)[:MAX_FEEDS_LEN]
Example #6
0
def fetch(force=False):
    ids = set()
    for node in NODES:
        node_key = NODE_KEY.format(node)
        res = cache.get(node_key)
        if res and not force:
            ids.update(res)
            continue
        fetch2cache(node)
        ids.update(cache.get(node_key))
    ids = list(set(ids))

    def _key(id):
        topic = cache.get(TOPIC_KEY.format(id))
        if not topic:
            return datetime(1970, 1, 1)
        return topic["published"]

    return sorted(ids, key=_key, reverse=True)[:MAX_FEEDS_LEN]
Example #7
0
def handle(data):
    if cache is not None:
        r = cache.get(key)
        if r:
            return random.choice(r)
    r = urllib2.urlopen('http://feedproxy.feedburner.com/qiubai', timeout=60)
    p = r.read()
    r = re.findall('<\!\[CDATA\[<p>(.*)<br/>', p)
    if r:
        if cache is not None:
            cache.set(key, r, 1800)
        return random.choice(r)
    else:
        raise Exception
Example #8
0
def handle(data):
    message = data["message"]
    ids = fetch(force=(True if u"刷新" in message else False))
    contents = []
    for id in ids:
        topic = cache.get(TOPIC_KEY.format(id))
        if not topic:
            continue
        node = topic["node"]
        msg = u"<{0}|{1} [{2}]>   <{3}|{4}>".format(
            TOPIC_URL.format(id), cgi.escape(topic["title"]), topic["published"], NODE_URL.format(node), node
        )
        contents.append(msg)
    return "\n".join(contents)
Example #9
0
def handle(data):
    message = data['message']
    ids = fetch(force=(True if u'刷新' in message else False))
    contents = []
    for id in ids:
        topic = cache.get(TOPIC_KEY.format(id))
        if not topic:
            continue
        node = topic['node']
        msg = u'<{0}|{1} [{2}]>   <{3}|{4}>'.format(TOPIC_URL.format(id),
                                                    cgi.escape(topic['title']),
                                                    topic['published'],
                                                    NODE_URL.format(node),
                                                    node)
        contents.append(msg)
    return '\n'.join(contents)
Example #10
0
def get_updated_interval(node_name, feeds, default=ONE_DAY):
    updated_times = []
    for id in feeds:
        topic = cache.get(TOPIC_KEY.format(id))
        if topic:
            updated_times.append(topic['updated'])
        else:
            print 'topic {} not cached!'.format(id)
    min = default
    for i in range(len(updated_times) - 1):
        sec = (updated_times[i] - updated_times[i + 1]).total_seconds()
        if sec < min:
            min = sec
        if min < ONE_MINUTE:
            min = ONE_MINUTE
            break
    return min
Example #11
0
def get_updated_interval(node_name, feeds, default=ONE_DAY):
    updated_times = []
    for id in feeds:
        topic = cache.get(TOPIC_KEY.format(id))
        if topic:
            updated_times.append(topic["updated"])
        else:
            print "topic {} not cached!".format(id)
    min = default
    for i in range(len(updated_times) - 1):
        sec = (updated_times[i] - updated_times[i + 1]).total_seconds()
        if sec < min:
            min = sec
        if min < ONE_MINUTE:
            min = ONE_MINUTE
            break
    return min
Example #12
0
 def _key(id):
     topic = cache.get(TOPIC_KEY.format(id))
     if not topic:
         return datetime(1970, 1, 1)
     return topic['published']
Example #13
0
 def _key(id):
     topic = cache.get(TOPIC_KEY.format(id))
     if not topic:
         return datetime(1970, 1, 1)
     return topic["published"]