Ejemplo n.º 1
0
    def get_and_store(self, url, depth):
        """
        根据给定的url获取并存储html页面
        :param url: 要被下载的url链接
        :param depth: url链接的深度
        """
        # url去重
        if url in self.url_set:
            log.debug("{} has been crawled".format(url))
            return
        else:
            self.url_set.add(url)
            log.debug("add {} to url_set".format(url))
        # 为爬虫添加headers
        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate",
            "Accept-Language":
            "zh-CN,zh;q=0.8,en;q=0.6",
            "Connection":
            "keep-alive",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/57.0.2987.110 Safari/537.36",
        }
        try:
            log.warning("get {}".format(url))
            r = requests.get(url, headers=headers, timeout=10)
            # 处理编码
            r.encoding = r.apparent_encoding
            r.raise_for_status()
            html = r.content
        except Exception as e:
            log.critical("Failed to get {} depth:{} error:{}".format(
                url, depth, e),
                         exc_info=True)
            return

        # 解析HTML
        soup = BeautifulSoup(html, 'lxml')
        # 连接数据库
        db = DataStore(self.dbfile)

        if self.keyword == "":
            db.insert(url, str(None), html)
            db.close()
        else:
            if self.keyword in html:
                db.insert(url, self.keyword, html)
                db.close()
            else:
                log.warning("Cannot find {} in {}".format(self.keyword, url))
        # 获取该页面内的链接,深度减一
        self.get_hyperlink(url, soup, depth - 1)
Ejemplo n.º 2
0
from datetime import datetime

import config

from azure_helper import EventHubHelper
from cube_parser import CubeParser
from database import DataStore

logging.config.fileConfig('log.config')
logger = logging.getLogger(config.logger_name)


def myExceptionHook(exctype, value, traceback):
    logger.error(value)
    sys.__excepthook__(exctype, value, traceback)

if __name__ == '__main__':
    sys.excepthook = myExceptionHook

    print("Running at %s" % datetime.utcnow())
    datas = CubeParser().get_data()
    print("Latest data: %s" % datas[0]["time"])
    
    if config.use_event_hub:
        data_json = json.dumps(datas)
        EventHubHelper.send_request(config.servicebus_namespace, config.eventhub_name, config.eventhub_key_name, config.eventhub_private_key, data_json)
    else:
        db = DataStore(config.db_server, config.db_name, config.db_user, config.db_password)
        data_rows = db.parse_to_db_format(datas)
        db.send_request(data_rows)
Ejemplo n.º 3
0
logging.config.fileConfig('log.config')
logger = logging.getLogger(config.logger_name)


def myExceptionHook(exctype, value, traceback):
    logger.error(value)
    sys.__excepthook__(exctype, value, traceback)
       
       
if __name__ == '__main__':
    sys.excepthook = myExceptionHook

    print("Running IFTTT checker at %s" % datetime.utcnow())
    
    store = DataStore(config.db_server, config.db_name, config.db_user, config.db_password)
    rows = store.getSensorBatteryStatuses()
        
    current_hour = datetime.utcnow().hour
    
    for row in rows:
        sensor_id = row[0]
        battery = row[1]
        cable = row[2]
         
        if battery <= 15 and cable == 0 and current_hour > 19:
            logger.debug("Request charging %s (%s : %s)" % (sensor_id, battery, cable))
            IFTTT.sendEvent(config.ifttt_api_key, sensor_id + config.ifttt_event_on)
        
        # Stop charging when nearing 100    
        if cable == 1 and battery > 96:
Ejemplo n.º 4
0
logging.config.fileConfig('log.config')
logger = logging.getLogger(config.logger_name)


def myExceptionHook(exctype, value, traceback):
    logger.error(value)
    sys.__excepthook__(exctype, value, traceback)


if __name__ == '__main__':
    sys.excepthook = myExceptionHook

    print("Running IFTTT checker at %s" % datetime.utcnow())

    store = DataStore(config.db_server, config.db_name, config.db_user,
                      config.db_password)
    rows = store.getSensorBatteryStatuses()

    current_hour = datetime.utcnow().hour

    for row in rows:
        sensor_id = row[0]
        battery = row[1]
        cable = row[2]

        if battery <= 15 and cable == 0 and current_hour > 19:
            logger.debug("Request charging %s (%s : %s)" %
                         (sensor_id, battery, cable))
            IFTTT.sendEvent(config.ifttt_api_key,
                            sensor_id + config.ifttt_event_on)