Beispiel #1
0
def crawl(phantom_port=9802):
    """
    使用PhantomJS抓取天气网页
    :param phantom_port:
    :return: 无返回
    """
    driver = None

    try:
        driver = init_phantom_js(phantom_port, DesiredCapabilities.CHROME)
        redis = redis_lib.StrictRedis(host='127.0.0.1', port=6379, db=0)

        city_id = crawl_city_data()

        for key, value in city_id.iteritems():

            print ' '.join(['Current', str(key), str(value)])

            driver.get(''.join(['http://www.weather.com.cn/weather1d/', key, '.shtml']))

            '''
            try:
                # Or EC.invisibility_of_element_located
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="today"]/div[1]/div/div[4]/span')))
            except Exception, e:
                print str(e)
            '''

            # 限行
            find, data = is_not_blank(xian_xing(driver))
            if find:
                set_string(redis, WEATHER_KEY_XX_PREFIX, key, value, data)

            # 气温
            find, data = is_not_blank(qi_wen(driver))
            if find:
                set_num(redis, WEATHER_KEY_PREFIX, key, value, data)

            # 更新时间
            find, data = is_not_blank(fa_bu_shi_jian(driver))

            if find and data is not None:
                data_ = data.encode('utf8')
                if data_.index('实况') != -1:
                    data = data_.replace('实况', '')
                    set_string(redis, WEATHER_KEY_UD_PREFIX, key, value, data)
    except Exception, e:
        print str(e)
Beispiel #2
0
def crawl(phantom_port=9801):
    """
    使用PhantomJS抓取天气网页
    :param phantom_port:
    :return:
    """
    driver = None

    try:
        driver = driver = init_phantom_js(phantom_port, DesiredCapabilities.FIREFOX)
        redis = redis_lib.StrictRedis(host='127.0.0.1', port=6379, db=0)

        city_id = crawl_city_data()

        for key, value in city_id.iteritems():
            driver.get(''.join(['http://www.weather.com.cn/air/?city=', key]))

            '''
            try:
                # Or EC.invisibility_of_element_located
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.aqi')))
            except Exception, e:
                print str(e)
            '''

            # pm25
            find, data = is_not_blank(pm25(driver))
            if find:
                set_num(redis, WEATHER_KEY_PM25_PREFIX, key, value, data)

            # 污染程度
            find, data = is_not_blank(wrcd(driver))
            if find:
                set_string(redis, WEATHER_KEY_WRCD_PREFIX, key, value, data)

            # pm10
            find, data = is_not_blank(pm10(driver))
            if find:
                set_num(redis, WEATHER_KEY_PM10_PREFIX, key, value, data)
    except Exception, e:
        print str(e)