Esempio n. 1
0
def get_desc(cityname, cityshort):
    if cache is not None:
        r = cache.get('airpollution.%s' % (cityshort))
        if r:
            return r

    title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower())

    r = requests.get(title_link, headers=headers)
    r.encoding = 'utf-8'
    p = r.text

    soup = BeautifulSoup(p)
    aqiwgtinfo = soup.find(id="aqiwgtinfo'").text
    aqivalue = soup.find("div", {'class': 'aqivalue'}).text
    min_pm25 = soup.find(id='min_pm25').text
    max_pm25 = soup.find(id='max_pm25').text
    text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format(
        cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue,
        max_pm25, min_pm25)
    if cache is not None:
        cache.set('airpollution.%s' % (cityshort), text, 1800)
    image_url = soup.find(id='tr_pm25').find(
        id='td_pm25').find('img').attrs.get('src')
    title = soup.find('title').text
    attaches = [
        gen_attachment(text, image_url, title=title, title_link=title_link)
    ]
    return text, attaches
Esempio n. 2
0
def fetch2cache(node_name):
    print 'Fetch {}'.format(node_name)
    r = requests.get(FEED_URL.format(node_name), verify=False)
    root = etree.fromstring(r.text.encode('utf-8'))
    entries = root.findall('{http://www.w3.org/2005/Atom}entry')
    node_key = NODE_KEY.format(node_name)
    feeds = cache.get(node_key) or []
    new_feeds = []
    for entry in entries:
        topic = {}
        id = entry[2].text.rpartition('/')[-1]
        key = TOPIC_KEY.format(id)
        for el in entry:
            for tag in TAGS:
                if el.tag.endswith(tag):
                    res = el.text
                    if tag in ('published', 'updated'):
                        res = datetime.strptime(res, '%Y-%m-%dT%H:%M:%SZ')
                    topic[tag] = res
            topic['node'] = node_name
        cache.set(key, topic, ONE_DAY)
        new_feeds.append(id)
    if new_feeds:
        new_feeds += feeds[:MAX_FEEDS_LEN - len(new_feeds)]
        interval = get_updated_interval(node_name, new_feeds)
        cache.set(node_key, new_feeds, interval)
Esempio n. 3
0
def fetch2cache(node_name):
    print "Fetch {}".format(node_name)
    r = requests.get(FEED_URL.format(node_name), verify=False)
    root = etree.fromstring(r.text.encode("utf-8"))
    entries = root.findall("{http://www.w3.org/2005/Atom}entry")
    node_key = NODE_KEY.format(node_name)
    feeds = cache.get(node_key) or []
    new_feeds = []
    for entry in entries:
        topic = {}
        id = entry[2].text.rpartition("/")[-1]
        key = TOPIC_KEY.format(id)
        for el in entry:
            for tag in TAGS:
                if el.tag.endswith(tag):
                    res = el.text
                    if tag in ("published", "updated"):
                        res = datetime.strptime(res, "%Y-%m-%dT%H:%M:%SZ")
                    topic[tag] = res
            topic["node"] = node_name
        cache.set(key, topic, ONE_DAY)
        new_feeds.append(id)
    if new_feeds:
        new_feeds += feeds[: MAX_FEEDS_LEN - len(new_feeds)]
        interval = get_updated_interval(node_name, new_feeds)
        cache.set(node_key, new_feeds, interval)
Esempio n. 4
0
def get_desc(cityname, cityshort):
    if cache is not None:
        r = cache.get('airpollution.%s' % (cityshort))
        if r:
            return r

    title_link = 'http://aqicn.org/city/{}/cn/'.format(cityshort.lower())

    r = requests.get(title_link, headers=headers)
    r.encoding = 'utf-8'
    p = r.text

    soup = BeautifulSoup(p)
    aqiwgtinfo = soup.find(id="aqiwgtinfo'").text
    aqivalue = soup.find("div", {'class': 'aqivalue'}).text
    min_pm25 = soup.find(id='min_pm25').text
    max_pm25 = soup.find(id='max_pm25').text
    text = '{0}实时空气质量指数(AQI): {1} {2} [最大:{3}, 最小:{4}]'.format(
        cityname.encode('utf-8'), aqiwgtinfo.encode('utf-8'), aqivalue,
        max_pm25, min_pm25)
    if cache is not None:
        cache.set('airpollution.%s' % (cityshort), text, 1800)
    image_url = soup.find(id='tr_pm25').find(id='td_pm25').find(
        'img').attrs.get('src')
    title = soup.find('title').text
    attaches = [gen_attachment(text, image_url, title=title,
                               title_link=title_link)]
    return text, attaches
Esempio n. 5
0
def handle(data):
    if cache is not None:
        r = cache.get(key)
        if r:
            return random.choice(r)
    r = urllib2.urlopen('http://feedproxy.feedburner.com/qiubai', timeout=60)
    p = r.read()
    r = re.findall('<\!\[CDATA\[<p>(.*)<br/>', p)
    if r:
        if cache is not None:
            cache.set(key, r, 1800)
        return random.choice(r)
    else:
        raise Exception