def crawl(phantom_port=9802): """ 使用PhantomJS抓取天气网页 :param phantom_port: :return: 无返回 """ driver = None try: driver = init_phantom_js(phantom_port, DesiredCapabilities.CHROME) redis = redis_lib.StrictRedis(host='127.0.0.1', port=6379, db=0) city_id = crawl_city_data() for key, value in city_id.iteritems(): print ' '.join(['Current', str(key), str(value)]) driver.get(''.join(['http://www.weather.com.cn/weather1d/', key, '.shtml'])) ''' try: # Or EC.invisibility_of_element_located WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="today"]/div[1]/div/div[4]/span'))) except Exception, e: print str(e) ''' # 限行 find, data = is_not_blank(xian_xing(driver)) if find: set_string(redis, WEATHER_KEY_XX_PREFIX, key, value, data) # 气温 find, data = is_not_blank(qi_wen(driver)) if find: set_num(redis, WEATHER_KEY_PREFIX, key, value, data) # 更新时间 find, data = is_not_blank(fa_bu_shi_jian(driver)) if find and data is not None: data_ = data.encode('utf8') if data_.index('实况') != -1: data = data_.replace('实况', '') set_string(redis, WEATHER_KEY_UD_PREFIX, key, value, data) except Exception, e: print str(e)
def crawl(phantom_port=9801): """ 使用PhantomJS抓取天气网页 :param phantom_port: :return: """ driver = None try: driver = driver = init_phantom_js(phantom_port, DesiredCapabilities.FIREFOX) redis = redis_lib.StrictRedis(host='127.0.0.1', port=6379, db=0) city_id = crawl_city_data() for key, value in city_id.iteritems(): driver.get(''.join(['http://www.weather.com.cn/air/?city=', key])) ''' try: # Or EC.invisibility_of_element_located WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.aqi'))) except Exception, e: print str(e) ''' # pm25 find, data = is_not_blank(pm25(driver)) if find: set_num(redis, WEATHER_KEY_PM25_PREFIX, key, value, data) # 污染程度 find, data = is_not_blank(wrcd(driver)) if find: set_string(redis, WEATHER_KEY_WRCD_PREFIX, key, value, data) # pm10 find, data = is_not_blank(pm10(driver)) if find: set_num(redis, WEATHER_KEY_PM10_PREFIX, key, value, data) except Exception, e: print str(e)