Exemple #1
0
def main():
    """
    the main method to run mini spider
    """
    # 日志保存到./log/spider.log和./log/spider.log.wf,按天切割,保留7天
    log.init_log("./log/spider")
    spider = SpiderEngine.SpiderEngine()
    try:
        opts, args = getopt.getopt(sys.argv[1:], "vhc:")
    except getopt.GetoptError as err:
        logging.error("get option error : %s." % err)
        return
    for o, a in opts:
        if o == "-v":
            version()
            return
        elif o == "-h":
            # spider_engine = _SpiderEngine._SpiderEngine()
            # spider_engine.config_tostring()
            # spider_engine.set_config()
            print "帮助信息:没有帮助^_^"
            return
        elif o == "-c":
            spider.set_config_by_file(a)
        else:
            logging.error("unhandled option")
            print "unhandled option"
            return
    spider.start_work()
    return
def main():
    """
    SendDataClient类,作为flashshot的服务
    """

    log.init_log('./logs/send_data_client')

    # 如果配置文件中配置了这个地区需要设定IP代理,则在上传文件前,先将代理IP取消,然后等执行完毕后再设置上
    # 并且将 uping 设置为 1,此时每五分钟执行的checkIpProxy将不会修改此IP,上传结束后就修改回 0
    if config.NEED_PROXY:
        configFile = ConfigParser.ConfigParser()
        configFile.read(CONFIGFILE)
        configFile.set("info", "uping", 1)
        configFile.write(open(CONFIGFILE, "w"))
        logging.info('setProxy("0") ')
        # 在传送图片前,先将本地代理IP关掉
        ipProxy.setProxy("0")

    target_folder = sys.argv[1]
    target_filenames = get_file_list(target_folder)
    upload_files(target_folder, target_filenames)  
    
    # 在传送图片后,将本地代理Ip继续设定
    if config.NEED_PROXY:
        configFile = ConfigParser.ConfigParser()
        configFile.read(CONFIGFILE)
        ip1 = configFile.get("info", "ip1")
        configFile.set("info", "uping", 0)
        configFile.write(open(CONFIGFILE, "w"))
        enableProxyScript = "python ipProxy.py " + ip1
        os.popen(enableProxyScript)
        # ipProxy.setProxy(ip1)
        logging.info('setProxy ' + ip1)
Exemple #3
0
def check_stops(environnement, coverage):
    log.init_log("", "")
    logger = logging.getLogger("vipere")
    logger.info("Vérification des arrets pour le coverage [{}] et sur [{}]".format(coverage, environnement))

    params = json.load(open('../params.json'))
    nav_url = params["environnements"][environnement]["url"]
    nav_key = params["environnements"][environnement]["key"]

    detail_test_result.append(["coverage", "env", "test_datetime", "object_id", "object_type", "test_category", "error", "infos", "error_level", "wkt"])

    load_naming_ref_files("../../Data_scripts/data/audit/reference")
    #on fait les appels par réseau et par ligne pour faire des appels plus petits
    nav_response_network = requests.get(nav_url + "coverage/{}/networks?count=1000".format(coverage), headers={'Authorization': nav_key})
    if nav_response_network.status_code != 200 :
        logger.error (">> l'appel navitia a renvoyé une erreur")
        return
    for a_network in nav_response_network.json()['networks'] :
        nav_response_line = requests.get(nav_url + "coverage/{}/networks/{}/lines/?count=1000".format(coverage, a_network["id"]),
            headers={'Authorization': nav_key})
        if "lines" in nav_response_line.json():
            for a_line in nav_response_line.json()["lines"]:
                check_stops_of_a_line(params, environnement, coverage, "stop_area", a_line["id"])
                check_stops_of_a_line(params, environnement, coverage, "stop_point", a_line["id"])
        else:
            detail_test_result.append([coverage, environnement, datetime.date.today().strftime('%Y%m%d'),
                a_network["id"], "network", "check_stop_point_and_stop_area_name", "network_has_no_line",
                "le réseau {} n'a pas de lignes".format(a_network["name"])
                , "red", ""])
    utils.write_errors_to_file (environnement, coverage, "check_stop_basics", detail_test_result)
    utils.generate_file_summary()
 def test_parse_success(self):
     """ test parse function success """
     log.init_log("./log/webpage_parse_test", logging.DEBUG)
     pattern = re.compile(r'.*.(htm|html)')
     html = r'<a href=page1.html>page 1</a><a href="page2.html">page 2</a>'
     urls = webpage_parse.parse(html, pattern, logging)
     self.assertTrue(len(urls) > 0)
Exemple #5
0
def main():
    """
    SendDataClient类,作为flashshot的服务
    """

    log.init_log('./logs/send_data_client')

    # 如果配置文件中配置了这个地区需要设定IP代理,则在上传文件前,先将代理IP取消,然后等执行完毕后再设置上
    # 并且将 uping 设置为 1,此时每五分钟执行的checkIpProxy将不会修改此IP,上传结束后就修改回 0
    if config.NEED_PROXY:
        configFile = ConfigParser.ConfigParser()
        configFile.read(CONFIGFILE)
        configFile.set("info", "uping", 1)
        configFile.write(open(CONFIGFILE, "w"))
        logging.info('setProxy("0") ')
        # 在传送图片前,先将本地代理IP关掉
        ipProxy.setProxy("0")

    target_folder = sys.argv[1]
    target_filenames = get_file_list(target_folder)
    upload_files(target_folder, target_filenames)

    # 在传送图片后,将本地代理Ip继续设定
    if config.NEED_PROXY:
        configFile = ConfigParser.ConfigParser()
        configFile.read(CONFIGFILE)
        ip1 = configFile.get("info", "ip1")
        configFile.set("info", "uping", 0)
        configFile.write(open(CONFIGFILE, "w"))
        enableProxyScript = "python ipProxy.py " + ip1
        os.popen(enableProxyScript)
        # ipProxy.setProxy(ip1)
        logging.info('setProxy ' + ip1)
def sample_repos(n):
    log.init_log()
    db.connect_db()

    db.sample_projects(n)

    log.close_log()
Exemple #7
0
def create_app():
    app = Flask(__name__)
    CORS(app)
    app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI
    # SQLALCHEMY_POOL_SIZE 配置 SQLAlchemy 的连接池大小
    app.config["SQLALCHEMY_POOL_SIZE"] = 5
    # SQLALCHEMY_POOL_TIMEOUT 配置 SQLAlchemy 的连接超时时间
    app.config["SQLALCHEMY_POOL_TIMEOUT"] = 15
    app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
    app.config["LOG_PATH"] = 'flask_log'
    init_log(app=app)
    # 初始化SQLAlchemy , 本质就是将以上的配置读取出来
    db.init_app(app)
    # region 注入蓝图
    app.register_blueprint(user)
    app.register_blueprint(order)
    app.register_blueprint(express)
    app.register_blueprint(product)
    app.register_blueprint(territory)
    app.register_blueprint(promotion)
    app.register_blueprint(school)
    app.register_blueprint(data_show)
    app.register_blueprint(product_group)
    app.register_blueprint(school_log)
    # endregion
    return app
 def test_parse_failure(self):
     """ test parse function failure """
     log.init_log("./log/webpage_parse_test", logging.DEBUG)
     pattern = re.compile(r'.*.(htm|html)')
     html = r'<a href=page1>page 1</a><a href="page2">page 2</a>'
     urls = webpage_parse.parse(html, pattern, logging)
     self.assertTrue(len(urls) == 0)
Exemple #9
0
def main():

    log.init_log("./log/spider")
    spider = process.spider_process()
    try:
        opts, args = getopt.getopt(sys.argv[1:], "vhc:")
    except getopt.GetoptError as err:
        logging.error("get option error : %s." % err)
        return
    for o, a in opts:
        print("XXX")
        if o == "-v":
            version()
            return
        elif o == "-h":
            print("帮助!")
            return
        elif o == "-c":
            spider.set_config_by_file(a)

        else:
            print("没有" + o + "参数")
            return
    spider.start_work()
    return
Exemple #10
0
def receiver():
    log_path = os.path.dirname(os.path.realpath(__file__)) + '/log/'
    log_name = "mainlog"
    log.init_log(log_path + log_name, logging.DEBUG)

    tcpserver = LogRecordSocketReceiver()

    tcpserver.serve_until_stopped()  
Exemple #11
0
    def setUp(self):
        """
        setUp - Initialize test case
        """

        log.init_log("./log/test")

        self.spider = Spider('conf/spider.conf')
def main():
    log.init_log()
    db.connect_db()

    # Code here
    plots.create_all_histograms()

    log.close_log()
Exemple #13
0
def main():
    """Main function of the process
    Args:
        None
    Return:
        None
    """
    parser = argparse.ArgumentParser(description="MiniSpider Options")
    parser.add_argument("-v", action="store_true", dest="version",
                       help="show current version and exit")
    parser.add_argument("-c", dest="conf",
                       help="path of the config file")

    args = parser.parse_args()

    if args.version:
        print "{name} v{version}".format(name=get_proc_name(), version=get_proc_version())
        sys.exit(0)

    log.init_log("./log/mini_spider")
    formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] %(message)s")

    if not (args.conf and os.path.isfile(args.conf)):
        logging.critical("invalid config file")
        sys.exit(1)

    config = ConfigParser.ConfigParser()
    config.read(args.conf)

    try:
        url_seeds_file = config.get("spider", "url_list_file")
        output_dir = config.get("spider", "output_directory")
        craw_timeout = config.get("spider", "crawl_timeout")
        url_pattern = config.get("spider", "target_url")
        crawl_depth = config.getint("spider", "max_depth")
        thread_num = config.getint("spider", "thread_count")
        crawl_inter = config.getfloat("spider", "crawl_interval")
    except ConfigParser.Error as e:
        logging.critical("Read config error: {error}".format(error=str(e)))
        exit(1)

    work_thread_manager = thread_manager.ThreadManager()

    signal.signal(signal.SIGTERM, h_signal)
    signal.signal(signal.SIGINT, h_signal)

    add_seed_tasks(url_seeds_file)

    worker_spider = spider.Spider(crawl_depth, url_pattern, output_dir, crawl_inter)

    for threanum in range(thread_num):
        worker_thread = work_thread.WorkThread(worker_spider)
        work_thread_manager.append_thread(worker_thread)

    work_thread_manager.start()

    # pause the main thread to receive signals
    signal.pause()
Exemple #14
0
def scheduled_job_visit():
    '''
    访问博客
    采集频率:1分整倍数,就执行这个函数
    '''
    init_log(_log_path='', _log_name='scheduler.log', _filemode='a')
    save_log("_________visit start_________", now_datetime())
    KuiDaiLi().start()
    save_log("_________visit end_________\n", now_datetime())
Exemple #15
0
    def setUp(self):
        """
        setUp - Initialize test case
        """

        log.init_log("./log/test")

        fh = urllib.urlopen('http://family.baidu.com/portal/')
        self.page = fh.read()
Exemple #16
0
def job_brush_flow():
    '''
    访问博客
    采集频率:1分整倍数,就执行这个函数
    '''
    init_log(_log_path='', _log_name='brush_flow.log', _filemode='a')
    save_log("_________brush start_________", now_datetime())
    mogu = MoGuRequest()
    mogu.start(_type='mogu')
    save_log("_________brush end_________\n", now_datetime())
Exemple #17
0
def main():
    """
    主程序入口
    """

    # 初始化log
    log.init_log("./log/spider")
    #解析命令行参数,获取配置文件信息
    conf_name = arg_parse()
    config_load.parse_conf(conf_name)
def main(config_file, task_file, command, status_file):
    """main function

    Args:
        config_file: config location
        task_file: task location, if it is "", get location from config_file
        command: str, if it is "", then use task_file, otherwise use this command
        status_file: status file location

    Return:
        0: success
        1: fail
    """
    # read config 
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    if not config:
        logging.fatal("Read config_file failed [%s]" %(config_file))
        return 1
    logging.info("Read config_file successful [%s]" % (config_file))
    
    # init log 
    log_file  = config.get("table_join_checker", "log")
    log_level = eval(config.get("table_join_checker", "level"))
    log.init_log(log_file, log_level)

    # init task_file 
    if command == "" and task_file == "":
        task_file = config.get("table_join_checker", "task_file")
    
    # init status_file 
    if status_file == "":
        status_file = config.get("table_join_checker", "status_file")
    status_code = config.get("table_join_checker", "status_code") 
    # init table_checker
    try:
        table_join_checker = TableJoinCheckerManager(task_file, command, status_file, status_code)
    except TaskManagerInitError as e:
        logging.fatal("Init table join checker failed because of [%s], task_file [%s]\
                      command [%s], status_file [%s]" % (e, task_file, command, status_file))
        return 1
    logging.info("Init table join checker successful, task_file [%s]\
                      command [%s], status_file [%s]" % (task_file, command, status_file))


    # excute every task in table_checker 
    ret = table_join_checker.excute()
    if ret == 0:
        logging.info("Excute table join checker successful")
    else:
        logging.fatal("Excute table join checker failed")
    
    del table_join_checker
    return ret
 def setUp(self):
     """
     init test
     """
     self.output_directory = '../data/output'
     self.max_depth = 1
     self.crawl_interval = 1
     self.crawl_timeout = 1
     self.target_url = '.*\\.(gif|png|jpg|bmp)$'
     self.test_func_tips = '=' * 20 + '%s' + '=' * 20
     # init log
     log.init_log("../log/mini_spider")
Exemple #20
0
def check_line_colors(environnement, coverage):
    """Lance la vérification de la présence des couleurs des lignes et du texte associé.
    Dans le cas où les deux couleurs sont présentes, le script vérifie le contraste (accessibilité RGAA).
    """
    log.init_log("", "")
    logger = logging.getLogger("vipere")
    logger.info("On teste le coverage [{}] sur l'environnement [{}] ".format(coverage, environnement))

    params = json.load(open('../params.json'))
    assert (environnement in params['environnements']), "L'environnement demandé n'existe pas"
    navitia_url = params['environnements'][environnement]['url']
    navitia_api_key = params['environnements'][environnement]['key']


    detail_test_result =  []
    detail_test_result.append(["coverage", "env", "test_datetime", "object_id", "object_type", "test_category", "error", "infos", "error_level", "wkt"])

    appel_nav_networks = requests.get(navitia_url + "coverage/{}/networks?count=1000".format(coverage), headers={'Authorization': navitia_api_key})
    if appel_nav_networks.status_code != 200 :
        logger.error (">> l'appel navitia a renvoyé une erreur")
        return
    for a_network in appel_nav_networks.json()['networks'] :
        appel_nav = requests.get(navitia_url + "coverage/{}/networks/{}/lines?count=1000&depth=0".format(coverage, a_network['id']), headers={'Authorization': navitia_api_key})
        if appel_nav.json()['pagination']['total_result'] > 1000 :
            logger.error (">> il y a trop de lignes sur le réseau {}, elles n'ont pas toutes été testées".format(a_network['name']))
        if "lines" in appel_nav.json():
            for a_line in appel_nav.json()['lines']:
                color = a_line['color']
                text_color = a_line['text_color']
                if not color or not text_color:
                    message = "il n'y a pas de couleur ou de couleur de texte pour la ligne {} du réseau {}".format(a_line['name'], a_network['name'])
                    result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "couleurs des lignes", "config manquante", message, "red", utils.geojson_to_wkt(a_line['geojson'])]
                    detail_test_result.append(result)
                elif not is_valid_color(color) or not is_valid_color(text_color):
                    message = "la couleur ou la couleur de texte pour la ligne {} du réseau {} est invalide".format(a_line['name'], a_network['name'])
                    result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "couleurs des lignes", "config erronée", message, "red", utils.geojson_to_wkt(a_line['geojson'])]
                    detail_test_result.append(result)
                else :
                    contrast = abs(compute_luminosity(text_color) - compute_luminosity(color))
                    if contrast == 0 :
                        message = "la couleur et la couleur du texte sont identiques pour la ligne {} du réseau {}".format(a_line['name'], a_network['name'])
                        result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "couleurs des lignes", "code de ligne illisible", message, "red", utils.geojson_to_wkt(a_line['geojson'])]
                        detail_test_result.append(result)
                    elif contrast <= 125 :
                        a_better_color = "blanc"
                        if compute_luminosity(color) >= 128 :
                            a_better_color = "noir"
                        message = "il n'y a pas assez de contraste entre la couleur et la couleur du texte pour la ligne {} du réseau {} : du {} serait moins pire".format(a_line['name'], a_network['name'], a_better_color)
                        result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "couleurs des lignes", "code de ligne peu lisible", message, "orange", utils.geojson_to_wkt(a_line['geojson'])]
                        detail_test_result.append(result)

    utils.write_errors_to_file (environnement, coverage, "check_line_colors", detail_test_result)
    utils.generate_file_summary()
    def __call__(self, parser, namespace, values, option_string=None):
        """
        对-c命令的实际响应
    
        Args:
          self:当前对象
          parser:分析器
          namespace:命名空间
          values:命令参数
          option_string:操作参数
    
        """
        log.init_log("./log/spider")  # 日志保存到./log/my_program.log和./log/my_program.log.wf,按天切割,保留7天
        logging.info("开始加载配置文件:" + values)
        try:
            config_load.ConfigObject.set_config(values)
        except ConfigParser.NoSectionError as ex:
            logging.error("没有会话异常," + values)
            sys.exit(1)
        except IOError as ex:
            logging.error("文件可能不存在," + values)
            sys.exit(1)
        except ValueError as ex:
            logging.error("出现非法值的转换")
            sys.exit(1)

        socket.setdefaulttimeout(float(config_load.ConfigObject.crawl_timeout))
        webpage_parse.UrlLister.set_pattern(config_load.ConfigObject.target_url)
        config = config_load.ConfigObject()

        try:
            max_depth = config.max_depth
            with open(config.url_list_file, "r") as f:
                for line in f:
                    url_dict = {}
                    url_dict["url"] = line
                    url_dict["depth"] = max_depth
                    crawl_thread.CrawlSeedThread.seedqueue_put_url(url_dict)
        except IOError as ex:
            logging.error("找不到该路径地址:" + config.url_list_file)
            logging.error(ex)
            return None

        seed_thread_count = config.thread_count / 2
        thread_count = config.thread_count
        crawl_thread.CrawlSeedThread.set_thread_flag(seed_thread_count)
        for num in range(seed_thread_count):
            t = crawl_thread.CrawlSeedThread(num)
            t.start()
        for num in range(seed_thread_count):
            t = crawl_thread.CrawlThread()
            t.start()
Exemple #22
0
    def setUp(self):
        """
        setUp - Initialize test case
        """

        log.init_log("./log/test")

        self.pattern = re.compile(r'.*.(gif|png|jpg|bmp)$')
        self.spider_config = SpiderConfig('../conf/spider.conf')

        self.crawler = CrawlUrl(1000, 0, self.spider_config,
                                'http://family.baidu.com/portal/',
                                self.pattern)
Exemple #23
0
def setup_routes(application):
    log.init_log(os.getcwd() + "//log//log")
    import_all()

    for handler in global_register_handler:
        # handler_instance = handler()
        logging.info("Path %s add Handler %s" % (handler.path, str(handler)))
        application.router.add_route("*", handler.path, handler)

    for thread in global_register_threads:
        thread_instance = thread()
        thread_instance.start()
        logging.info("Thread: %s start" % (str(thread), ))
Exemple #24
0
    def run(self):
        """go to war"""
        print "Spider start"

        log.init_log(self.conf['log_path'])
        contr = controller.Controller(self.conf)
        
        start_urls = []
        with open(self.urls, 'r') as url_handler:
            start_urls = url_handler.readlines()

        print start_urls
        contr.run(start_urls)
        
        print "Spider done..."
def download_repos(min_stars, max_stars):
    log.init_log()
    db.connect_db()

    log.log("Getting repos from github api")
    repos = github.get_repos(min_stars, max_stars)

    log.log("Saving repos in database")
    # Save repos in db
    for repo in repos:
        db.insert_project(repo)

    log.log("Repos saved in database")

    log.close_log()
def main():
    """
    main func
    """
    VERSION = "1.0"
    log.init_log("logs/mini_spider")
    logger = logging.getLogger(__name__)

    parser = argparse.ArgumentParser(prog='mini spider')
    parser.add_argument("-c", "--conf", help="config file path", required=True)
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    # init global variables
    try:
        ret_conf = config_load.conf_parser(args.conf)
    except UnboundLocalError as msg:
        logging.error("Read conf fail. Message: %s" % msg)
        sys.exit(-1)
    else:
        if ret_conf is False:
            sys.exit(0)

    lock = threading.Lock()
    url_queue = Queue.Queue()
    crawed_urls = set()

    with open(global_value.URL_LIST_FILE) as fp:
        for start_point in fp:
            if not start_point.startswith('http'):
                continue
            start_url = url_item.UrlItem(start_point.strip('/\n\r'))
            url_queue.put(start_url)

    threads = []
    """start thread"""
    for i in xrange(global_value.THREAD_COUNT):
        spider_thread = SpiderThread(url_queue, lock, crawed_urls)
        threads.append(spider_thread)
        spider_thread.start()
        logging.info("staring spider thread...")
    """stop thread"""
    for thread in threads:
        thread.join()
    logging.info("spider work is done!")
Exemple #27
0
def main():
    """
    main function to call trans_kw
    """
    log.init_log("./log/my_program")
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--readfile', action='store', dest='FILEPATH', default='.', help='set file path')
    args = parser.parse_args()
    file_path = args.FILEPATH
    trans_list = read_from_file(file_path)
    if trans_list is None:
        return 
    for trans in trans_list:
        print 'word:', trans
        trans_kw(trans)
        print '========================='
Exemple #28
0
def main():
    init_log()

    # load config from settings.py
    load_config()

    # init ssh_key and tag object
    ssh_key = init_ssh_key()
    tag = init_tag()

    # destroy all droplet if necessary
    destroy_all_droplet()

    # make sure all robot's droplet are destroyed
    time.sleep(3)
    manager = digitalocean.Manager(token=config.token)
    assert len(manager.get_all_droplets(tag_name=config.default_tag_name)) == 0

    # create a new droplet
    droplet = create_new_droplet(ssh_key)

    # add the default tag to the droplet
    tag.add_droplets(droplet)

    # get the necessary information to access machine
    ip = get_machine_ip()

    # execute commands on the remote host
    ssh_connect(ip, 'root', config.public_key_file_path)
    logging.info('update system.....   it takes some time')
    ssh_command('yum install wget git -y')
    ssh_command('wget https://raw.githubusercontent.com/zMingGit/general/master/tcp_nanqinlang-1.3.2.sh')
    ssh_command('chmod u+x tcp_nanqinlang-1.3.2.sh')
    ssh_command('./tcp_nanqinlang-1.3.2.sh install')
    ssh_command('reboot')
    ssh_connect(ip, 'root', config.public_key_file_path)
    logging.info('install tcp TCP congestion algorithm.....  it takes some time')
    ssh_command('./tcp_nanqinlang-1.3.2.sh start')

    logging.info('install SSR...')
    ssh_command('yum install -y epel-release')
    ssh_command('yum install -y libsodium')
    ssh_command('git clone https://github.com/shadowsocksrr/shadowsocksr.git')
    ssh_command("echo '%s' > shadowsocksr/config.json" % config.ssr_config)
    ssh_command('python shadowsocksr/shadowsocks/server.py -c shadowsocksr/config.json -d start')
    logging.info('successfully installed')
Exemple #29
0
 def __init__(self, *, maxlen, log_level):
     self.queue = deque(maxlen=maxlen)
     self.log = init_log(log_name="Statistic", log_level=log_level)
     self.__min = None
     self.__max = None
     self.__avg = None
     self.__sum = 0
     self.__len = 0
Exemple #30
0
def create_app() -> Flask:
    app = Flask(__name__)
    app.config.from_object(Config)

    import error
    error.init_error(app)

    import log
    log.init_log(app)

    import routes
    routes.init_routes(app)

    if app.config['ENV'] == 'development':
        print(app.url_map)

    return app
    def __init__(self,host,port,user,password,database):
        self.host = host
        self.port = int(port)
        self.user = user
        self.password = password
        self.database = database
        self.charset = "utf8"
        log.init_log("log/muti")

        self.conn = MySQLdb.connect(host=self.host,
                                    port=self.port,
                                    user=self.user,
                                    passwd=self.password,
                                    db=self.database,
                                    charset=self.charset
                                    )
	self.conn.autocommit(1)
        self.cursor = self.conn.cursor()
Exemple #32
0
 def __init__(self):
     self.url_list_file = ''
     self.output_directory = ''
     self.max_depth = ''
     self.crawl_interval = ''
     self.target_url = ''
     self.thread_count = ''
     self.crawl_timeout = ''
     # 日志保存到./log/mini_spider_log.log和./log/mini_spider_log.log.wf,按天切割,保留7天
     self.logger = log.init_log("./log/mini_spider_log")
Exemple #33
0
def check_lines(environnement, coverage):
    """Lance la vérification de la présence des codes et noms des lignes et qu'ils ne sont pas identiques.
    """
    log.init_log("", "")
    logger = logging.getLogger("vipere")
    logger.info("Vérification des lignes pour le coverage [{}] et sur l'environnement [{}]".format(coverage, environnement))

    params = json.load(open('../params.json'))
    assert (environnement in params['environnements']), "L'environnement demandé n'existe pas"
    navitia_url = params['environnements'][environnement]['url']
    navitia_api_key = params['environnements'][environnement]['key']

    #pour éviter les gros appels, on fait un appel par réseau
    nav_response_network = requests.get(navitia_url + "coverage/{}/networks?count=1000".format(coverage), headers={'Authorization': navitia_api_key})
    if nav_response_network.status_code != 200 :
        logger.error(">> l'appel navitia a renvoyé une erreur")
        return
    errors = []
    errors.append(["coverage", "env", "test_datetime", "object_id", "object_type", "test_category", "error", "infos", "error_level", "wkt"])
    for a_network in nav_response_network.json()['networks'] :
        nav_response_line = requests.get(navitia_url + "coverage/{}/networks/{}/lines/?count=1000".format(coverage, a_network["id"]),
            headers={'Authorization': navitia_api_key})
        if "lines" in nav_response_line.json():
            for a_line in nav_response_line.json()['lines']:
                if a_line["code"].strip() == "":
                    errors.append([coverage, environnement, datetime.date.today().strftime('%Y%m%d'),
                        a_line["id"], "line", "check_line_name_and_code", "no_line_code", '"' + a_line["name"].replace('"', '""')+'"', "orange",
                        utils.geojson_to_wkt(a_line['geojson'])])
                if a_line["name"].strip() == "":
                    errors.append([coverage, environnement, datetime.date.today().strftime('%Y%m%d'),
                        a_line["id"], "line", "check_line_name_and_code", "no_line_name", a_line["code"], "red",
                        utils.geojson_to_wkt(a_line['geojson'])])
                if a_line["name"].strip() and (a_line["name"].strip() == a_line["code"].strip()):
                    errors.append([coverage, environnement, datetime.date.today().strftime('%Y%m%d'),
                        a_line["id"], "line", "check_line_name_and_code", "line_code_and_name_identical", a_line["code"], "orange",
                        utils.geojson_to_wkt(a_line['geojson'])])
        else:
            errors.append([coverage, environnement, datetime.date.today().strftime('%Y%m%d'),
                a_network["id"], "network", "check_line_name_and_code", "network_has_no_line",
                "le réseau {} n'a pas de lignes".format(a_network["name"])
                , "red", ""])
    utils.write_errors_to_file (environnement, coverage, "check_line_name_and_code", errors)
    utils.generate_file_summary()
Exemple #34
0
def launch_tests(environnement):
    log.init_log("", "")
    logger = logging.getLogger("vipere")

    params = json.load(open('../params.json'))
    assert (environnement in params['environnements']), "L'environnement demandé n'existe pas"

    for script in params["tests"]:
        for coverage in params["tests"][script]:
            script_params = ["python3", script+".py", environnement, coverage]
            if type(params["tests"][script]) is dict:
                additional_params = params["tests"][script][coverage]
                logger.debug(str(additional_params))
                if type(additional_params) is str:
                    script_params.append(additional_params)
                else:
                    script_params.extend(additional_params)
            logger.debug("Lancement de : " + str(script_params))
            subprocess.call(script_params)
Exemple #35
0
 def __init__(self, queue, conf_dict):
     threading.Thread.__init__(self)
     self.queue = queue
     self.output_directory = conf_dict['output_directory']
     self.max_depth = conf_dict['max_depth']
     self.crawl_interval = conf_dict['crawl_interval']
     self.crawl_timeout = conf_dict['crawl_timeout']
     self.target_url = conf_dict['target_url']
     self.thread_stop = False
     self.logger = log.init_log("./log/mini_spider_log")
Exemple #36
0
def predict():
    global args, best_prec1, logger
    args = parser.parse_args()
    data_set = CarDataSet([IMG_TEST_PATH], is_predict=True)
    logger = log.init_log(logfile="./test_log.txt", log_name="test data")
    logger.info("Loading dataset...")

    test_loader = DataLoader(
        data_set,
        shuffle=False,
        drop_last=True,
        # sampler=[6],
        num_workers=args.workers)
    num_channel = 3
    width, high = (data_set.high, data_set.width)
    model = CarUNet(in_shape=(num_channel, width, high), num_classes=1)
    model.cuda()
    checkpoint = torch.load("model_best.pth.tar")
    args.start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']
    model.load_state_dict(checkpoint['state_dict'])
    print best_prec1
    import csv
    # import matplotlib.pyplot as plt
    csv_file = open("submission_v2.csv", "wb")
    writer = csv.writer(csv_file, delimiter=',')
    writer.writerow(["img", "rle_mask"])
    for it, (img_tensor, name) in enumerate(test_loader):
        if it % 1000 == 0:
            print it, it / 100000.0 * 100, "%"
        # if name[0] != "0d1a9caf4350_05.jpg":
        #     continue
        # print name
        # img_out = torch.torch.from_numpy(np.zeros(shape=(1280, 1918)))
        image_ = Variable(img_tensor.cuda(), volatile=True)
        logits = model(image_)
        probs = F.sigmoid(logits)
        masks = (probs > 0.5).float()
        img_out = masks.cpu()
        img_out = img_out.data.numpy()
        img_out = img_out[0][0]
        img_out = Image.fromarray(img_out)
        img_out = img_out.resize((1918, 1280), 0)
        img_out_ = np.asarray(img_out)

        # run_length_encode(img_out_)

        # plt.figure()
        # plt.imshow(img_out_, interpolation='None')
        # plt.show()
        # from scripts import gif_loader
        # mask = gif_loader("/home/wuliang/wuliang/CIMC/car_mask/dataset/train_masks/0cdf5b5d0ce1_01_mask.gif")
        rl = run_length_encode(img_out_)
        writer.writerow([name[0], rl])
Exemple #37
0
def main():
    """
    Main method to run mini spider
    """
    # get input params
    args = parm_parser.get_args()
    # init log config
    log.init_log('./log/mini_spider')
    if args:
        # read config file spider.conf
        conf_params = parm_parser.set_config_by_file(args.conf)
        # use config set up spider initial params
        spider = SpiderWorker(conf_params)
        # init result_path, make it complete
        spider.set_path()
        # init url queue
        spider.set_url_queue()
        # start to crawl url
        spider.start_crawl_work()

    return
Exemple #38
0
def main():
    """1.初始化日志
       2.解析命令行参数获取配置文件路径
       3.创建Spider对象并初始化
       4.开始抓取
    """
    log.init_log('./log/spider')
    config_path = parse_commandline()

    if config_path is None:
        print_usage()
    else:
        #create a spider and start it
        _spider = spider.Spider()
        if _spider.initialize(config_path):
            _spider.start()
            _spider.print_info()
            logging.info("All thread finished")
        else:
            logging.error("Initialize spider failed")
            return False
Exemple #39
0
def init_log(log_path):
    """初始化日志类,打印日志

    Args:
    string log_path:日志路径

    Returns:
    none 
    """
    global logging
    logging = log.init_log(log_path)
    return logging
Exemple #40
0
def log_test(log_path, n_iter):
    """
    log_test() tests the log function
    """
    init_log(log_path)

    test_dict = {}
    for _ in range(n_iter):
        now = datetime.datetime.now()
        current_time = now.strftime("%d-%m-%Y %H:%M:%-S:%f")
        random_data = random.randint(0, 10)
        test_dict[current_time] = random_data
    log(log_path, test_dict, MAX_SIZE)

    test_dict = {}
    for _ in range(n_iter):
        now = datetime.datetime.now()
        current_time = now.strftime("%d-%m-%Y %H:%M:%-S:%f")
        random_data = random.randint(0, 10)
        test_dict[current_time] = random_data
    log(log_path, test_dict, MAX_SIZE)

    test_dict = {}
    for _ in range(n_iter):
        now = datetime.datetime.now()
        current_time = now.strftime("%d-%m-%Y %H:%M:%-S:%f")
        random_data = random.randint(0, 10)
        test_dict[current_time] = random_data
    log(log_path, test_dict, MAX_SIZE)

    test_dict = {}
    for _ in range(n_iter):
        now = datetime.datetime.now()
        current_time = now.strftime("%d-%m-%Y %H:%M:%-S:%f")
        random_data = random.randint(0, 10)
        test_dict[current_time] = random_data
    log(log_path, test_dict, MAX_SIZE)

    loaded_data = load_data(log_path)
    assert len(loaded_data) == 4 * n_iter
Exemple #41
0
def create_app(config_file="server.ini"):

    #Check ini file for quick configuration
    if not os.path.exists(config_file):
        print "error: can't find ini file %s" % config_file
        usage(2)

    # Load settings from ini file
    log_file, db_path, random_id, trace_path, option_path = load_inifile(config_file)
    log.init_log(log_file)
    interface.init(db_path, random_id, trace_path, option_path)
    
    # Stop Werkzeug logging
    
    wz_log = logging.getLogger('werkzeug')
    wz_log.disabled = True

    application = Flask(__name__)
    application.register_blueprint(aa_bp)
    application.register_blueprint(interface.if_blueprint)
    
    return application
Exemple #42
0
def main():
    """
    main - Mini Spider Main function

    Return:
        True  - good
        False - error
    """

    status = True

    start_time = datetime.datetime.now()

    # Log will go to ./log/mini_spider.log and ./log/mini_spider.log.wf 
    # Separated by day and keep for 7 days
    log.init_log("./log/mini_spider")
    logging.info('Mini Spider crawling is starting ...')

    config = parse_opts()
    mini_spider = MiniSpider(config)

    try:
        mini_spider.run()
    except ThreadException as e:
        logging.error(e)
        status = False

    # Set network connection timeout for urllib
    socket.setdefaulttimeout(mini_spider.spider_config.crawl_timeout)

    logging.info('Mini Spider crawling is done, please check the result under output/ directory.')
    mini_spider.summary()

    end_time = datetime.datetime.now()
    logging.info('Total time used:       ' + str((end_time - start_time).seconds) + ' seconds')
    logging.info('Exit main thread.')

    return status
def main():
    """
    spider main function

    usage:  python mini_spider [options]
    options:
        -c CONFIG_FILE_PATH, --config_file_path CONFIG_FILE_PATH the spider config file path
        -h, --help            show this help message and exit
        -v, --version         show spider version and exit
    """
    # init log
    log.init_log("../log/mini_spider")
    # parse args
    parser = argparse.ArgumentParser(description="mini directional spider")
    parser.add_argument("-v", "--version", action="store_true", help="show spider version and exit")
    parser.add_argument("-c", "--config_file_path", help="config file path")
    args = parser.parse_args()
    config_file_path = args.config_file_path
    config_file_path = '../conf/spider.conf'
    if args.version:
        print "mini spider 0.1"
        return 0

    if config_file_path is None:
        usage = "usage: python mini_spider.py -c spider_conf_file_path"
        logging.info("the config path cannot be empty, " + usage)
        return -1

    # read conf
    ret, config_map = config_load.load_config(config_file_path)
    if ret != 0:
        return ret

    # init some spider to run with multiply threading
    urls_queue = Queue.Queue()
    crawled_urls_list = []
    code, urls_list = seedfile_load.get_urls(config_map.get('url_list_file', ''))
    if code != 0:
        return code
    if not urls_list:
        logging.error('the seed urls is empty.')
        return -1
    for url in urls_list:
        url_item = {'url': url, 'depth': 0}
        urls_queue.put(url_item)
    thread_count = config_map.get('thread_count', 1)
    thread_list = []
    __init_output_dir(config_map.get('output_directory', '.'))
    for i in xrange(thread_count):
        spider_thread = spider.Spider(urls_queue,
                                      config_map.get('output_directory', '.'),
                                      config_map.get('max_depth', 1),
                                      config_map.get('crawl_interval', 1),
                                      config_map.get('crawl_timeout', 1),
                                      config_map.get('target_url', '.*\\.(gif|png|jpg|bmp)$'),
                                      crawled_urls_list,
                                      thread_count)
        thread_list.append(spider_thread)
        spider_thread.start()

    # for thread_item in thread_list:
    #     thread_item.join()
    tips = 'Finished crawling all pages'
    logging.info(tips)
    print tips
    return 0
            usage()
            logging.error('args config_file not fill')
            sys.exit(PARSE_CMDLINE_ARGUMENTS_ERROR)

    except getopt.GetoptError:
        print "args error!"
        usage()
        logging.error('args errors')
        sys.exit(PARSE_CMDLINE_ARGUMENTS_ERROR)

    return config_file


if "__main__" == __name__:
    #config log info
    log.init_log("./log/spider", logging.DEBUG)

    #get command line args
    config_file = parseCmdLine(sys.argv)
    if config_file is None:
        sys.exit(PARSE_CMDLINE_ARGUMENTS_ERROR)
    
    #load config file
    logging.debug('load config file...')
    config_para_code, config_para_item = config_load.load_config(config_file)

    #check load config file ok?
    if config_para_code != 0:
        #error
        print 'load config file error', config_para_item
        logging.error('load config file ' + str(config_para_code) + ', ' + config_para_item)
Exemple #45
0
TEMPLATE_DIRS = (
    # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
    # Always use forward slashes, even on Windows.
    # Don't forget to use absolute paths, not relative paths.
)

INSTALLED_APPS = (
    # 'django.contrib.auth',
    # 'django.contrib.contenttypes',
    # 'django.contrib.sessions',
    # 'django.contrib.sites',
    # 'django.contrib.messages',
    # 'django.contrib.staticfiles',
    'upload',
    'download',
    'plugins',
    'search',
    # Uncomment the next line to enable the admin:
    # 'django.contrib.admin',
    # Uncomment the next line to enable admin documentation:
    # 'django.contrib.admindocs',
)

# logging configuration
LOGGING_CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'logging.cfg')
init_log(LOGGING_CONFIG_FILE)
logging.getLogger('django.db.backends').setLevel(logging.ERROR)

# close tmp file
FILE_UPLOAD_MAX_MEMORY_SIZE = globalconf.FILE_UPLOAD_MAX_MEMORY_SIZE
Exemple #46
0
            ] + [
                (
                    self.items[name],
                    self.items[name],
                    io.open(filename, 'rb').read(),
                    self.mimetypes[name]
                ) for name, filename in self.filenames.items()
            ]


### command line interface {{{1

if __name__ == '__main__':

    from log import lo, init_log, DEBUG, INFO
    init_log(lo)

    import argparse

    parser = argparse.ArgumentParser(description=
            'scriptable communication with ODK Aggregate server v' + VERSION)

    parser.add_argument('--debug', '-d', help='show debug output', action='store_true')
    parser.add_argument('--username', '-u', help='username for login', default=None)
    parser.add_argument('--password', '-p', help='password for login', default=None)

    parser.add_argument('--server', '-s', required=True,
            help='complete URL of server in the form ' +
            'http[s]://server.com[:port]/ODKAggregate (assumes port ' +
            '80 for http and 443 for https if not specified)')
Exemple #47
0
        logging.info('All %s-level crawling jobs for %s are completed' %
                     (self.max_depth, start_url))
        return 0

    def bfs_crawl_all(self):
        """ crawl all urls in BFS

        scrawl url from list one by one
        """
        try:
            with open(self.url_list_file, "r") as fd:
                start_url_list = fd.readlines()
                for start_url in start_url_list:
                    logging.info("start to crawl %s" % start_url)
                    self.bfs_crawl(start_url.strip())
        except IOError as e:
            logging.error('Fail to read urls from %s exception: %s' % (self.url_list_file, e))

if __name__ == "__main__":
    log.init_log("./log/mini_spider")
    cfg_file = config_load.opt_init()
    if cfg_file is None:
        logging.error('No configure file specified in options, exit..')
        sys.exit(0)
    cfg_dict = config_load.config_init(cfg_file)
    if cfg_file is None:
        logging.error('Configure file content not valid, exit..')
        sys.exit(1)
    mini_spider = MiniSpider(cfg_dict)
    mini_spider.bfs_crawl_all()
Exemple #48
0
def main():
    log.init_log('./log/block')
    block_monitor("jx")
    unblock_monitor("jx")
Exemple #49
0
    @File   : 'wsgi'
    @Author : 'jonah'
    @Date   : '2015-09-12 14:36'
    @About  : 'wsgi server'
"""

import leancloud
import wsgiref.handlers
import wsgiref.simple_server
from app import app
from configs import config
import log


port = config.PORT
appid = config.APP_ID
master_key = config.MASTER_KEY
leancloud.init(appid, master_key=master_key)

# init application
application = leancloud.Engine(app)
# init log
log.init_log()


if __name__ == '__main__':
    server = wsgiref.simple_server.make_server('', port, application)
    sa = server.socket.getsockname()
    print "Serving HTTP on", sa[0], "port", sa[1], "..."
    server.serve_forever()
Exemple #50
0
class FaviconHandler(tornado.web.RequestHandler):
  def get(self):
    self.redirect("http://www.baidu.com/favicon.ico", True)

class ClearMQHandler(tornado.web.RequestHandler):
  def get(self):
    try:
      count = 0
      server = redis.Redis("localhost")
      while server.llen("MQ_SEND_TO_KINDLE"):
        server.brpop("MQ_SEND_TO_KINDLE")
        count += 1
      self.write("clear %s items" % count)
    except:
      pass

application = tornado.web.Application([
  (r"/send_to_kindle", SendToKindleHandler),
  (r"/clear_mq", ClearMQHandler),
  (r"/favicon\.ico", FaviconHandler)
])

if __name__ == "__main__":
  from log import init_log
  init_log("logs/access.log")

  application.listen(8965)
  tornado.ioloop.IOLoop.instance().start()

Exemple #51
0
"""
File: run_main.py
Author: mijianhong([email protected])
Date: 2016/07/27 20:16:15
"""
import logging
import argparse

import log
import goo_translate

if __name__ == '__main__':
    """
    主程序入口
    """
    log.init_log('./log/mini_spider')   
    logging.info('%-35s' % ' * Google-translate is starting ... ')
    parser = argparse.ArgumentParser(description = 'This is a google-translation programm!')
    parser.add_argument('-v', 
                        '--version',
                        action='version', 
                        version='%(prog)s 1.0.0')

    parser.add_argument('-f',
                        '--kw_file',
                        action='store',
                        dest='FILE_PATH',
                        default='word.dict',
                        help='please set file_path ... ')
    parser.add_argument('-n',
                        '--thread_num',
 * *****************************************************************************/
 '''
#! /usr/bin/python
# -*- coding:utf-8 -*-

import tornado.httpserver
import tornado.ioloop
import tornado.options
import tornado.web
import os, sys
sys.path.append("..")
from conf.conf import http_config as http_config

from tornado.options import define, options
define("port", default=http_config["port"], help="run on the given port", type=int)

from application import Application
import logging 
from log import init_log

if __name__ == '__main__':
    init_log()
    foxea_logger = logging.getLogger('foxea')  
    foxea_logger.info('foxea torando web start') 

    tornado.options.parse_command_line()
    application = Application()
    http_server = tornado.httpserver.HTTPServer(application)
    http_server.listen(options.port)
    tornado.ioloop.IOLoop.instance().start()
Exemple #53
0
import os
import sys
import time
import log
import ConfigParser
import spider

reload(sys)
sys.setdefaultencoding('utf8')

if __name__ == "__main__":
    cf = ConfigParser.ConfigParser()
    #cf.read("./conf/nice.conf")
    cf.read(sys.argv[1])
    #初始logging
    log.init_log(cf.get("nice", "BASE") + cf.get("nice", "LOG_FILE"), log.logging.INFO)
    log.logging.info("read conf ok [%s]" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
   
    crawled_avatar_conf = sys.argv[2]
    crawled_pic_conf = sys.argv[3]
    Spider = spider.Spider(cf)
    #读取种子用户和已爬取的头像和图片
    Spider.prepare(crawled_avatar_conf, crawled_pic_conf)

    #爬取
    time_now = int(time.time())
    log.logging.info("spider nice job start [%s]", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time_now)))
    Spider.work(time_now)

    #保存用户爬取情况
    Spider.finish()
Exemple #54
0
def test_all_bss_for_realtime_on_stands(environnement, coverage, *insee_filter):
    log.init_log("", "")
    logger = logging.getLogger("vipere")
    logger.info("Test du temps réel de la dispo VLS pour le coverage [{}] et sur l'environnement [{}]".format(coverage, environnement))

    params = json.load(open('../params.json'))
    assert (environnement in params['environnements']), "L'environnement demandé n'existe pas"
    navitia_url = params['environnements'][environnement]['url']
    navitia_api_key = params['environnements'][environnement]['key']

    total_nb_tests = 0
    test_result = {}
    test_result['POI hors périmètre'] = 0
    test_result['POI non paramétré'] = 0
    test_result['POI mal paramétré'] = 0
    test_result['POI paramétré mais ko'] = 0
    test_result['POI ok'] = 0
    test_result['non testé'] = 0

    detail_test_result =  []
    detail_test_result.append(["coverage", "env", "test_datetime", "object_id", "object_type", "test_category", "error", "infos", "error_level", "wkt"])

    appel_nav_url = navitia_url + "coverage/{}/poi_types/poi_type:amenity:bicycle_rental/pois?count=900".format(coverage)
    appel_nav = requests.get(appel_nav_url, headers={'Authorization': navitia_api_key})
    #TODO : gérer la pagination de manière plus fine, sur des gros coverage, on peut avoir plus de POIs

    if appel_nav.status_code != 200 :
        logger.error (">> l'appel navitia a renvoyé une erreur : " + appel_nav_url)
        return

    if appel_nav.json()['pagination']['total_result'] > 200 :
        test_result['non testé'] += appel_nav.json()['pagination']['total_result'] - 900
        message = "Il y a trop de POIs, ils n'ont pas tous été testés"
        result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), coverage, "coverage", "temps réel VLS", "évo du script à prévoir", message, "red", ""  ]
        detail_test_result.append(result)
        logger.error(message)

    pois = appel_nav.json()['pois']
    for a_poi in pois :
        if (total_nb_tests % 100) == 0:
            logger.info("Verification du VLS {} sur {}".format(str(total_nb_tests), str(len(pois))))
        total_nb_tests += 1
        if insee_filter:
            if a_poi['administrative_regions'][0]['insee'] not in insee_filter :
                test_result['POI hors périmètre'] += 1
                continue

        if len(a_poi['properties']) == 0 :
            result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_poi['id'], "poi", "temps réel VLS", "config manquante", "Pas de propriétés sur ce VLS", "orange", "POINT({} {})".format(a_poi['coord']['lon'],a_poi['coord']['lat'] )  ]
            detail_test_result.append(result)
            test_result['POI non paramétré'] += 1
        else :
            if not "operator" in a_poi['properties'] or not "network" in a_poi['properties'] or not "ref" in a_poi['properties']:
                result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_poi['id'], "poi", "temps réel VLS", "config manquante", "Il manque operator, network ou ref sur ce VLS", "orange", "POINT({} {})".format(a_poi['coord']['lon'],a_poi['coord']['lat'] )  ]
                detail_test_result.append(result)
                test_result['POI mal paramétré'] += 1
                continue
            else :
                if not "stands" in a_poi :
                    result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_poi['id'], "poi", "temps réel VLS", "temps réel ko", "la config a l'air ok, mais ça ne fonctionne pas", "red", "POINT({} {})".format(a_poi['coord']['lon'],a_poi['coord']['lat'] )  ]
                    detail_test_result.append(result)
                    test_result['POI paramétré mais ko'] += 1
                elif a_poi['stands'] is None :
                    result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_poi['id'], "poi", "temps réel VLS", "temps réel ko", "la config a l'air ok, mais le webservice tiers renvoit n'importe quoi", "red", "POINT({} {})".format(a_poi['coord']['lon'],a_poi['coord']['lat'] )  ]
                    detail_test_result.append(result)
                    test_result['POI paramétré mais ko'] += 1
                else :
                    test_result['POI ok'] += 1

    logger.info("Résultat des tests : ")
    logger.info(">> {} cas de tests".format(total_nb_tests))
    logger.info(">> {} VLS pas paramétrés du tout".format(test_result['POI non paramétré']))
    logger.info(">> {} VLS avec des paramètres manquants".format(test_result["POI mal paramétré"]))
    logger.info(">> {} VLS en erreur quoique bien paramétrés".format(test_result['POI paramétré mais ko']))
    logger.info(">> {} VLS ignorés car hors périmètre".format(test_result['POI hors périmètre']))
    logger.info(">> {} VLS qui fonctionnent".format(test_result['POI ok']))

    utils.write_errors_to_file (environnement, coverage, "check_realtime_on_bss", detail_test_result)
    utils.generate_file_summary()
Exemple #55
0
def main():
  server = redis.Redis("localhost")
  while True:
    try:
      key, _ = server.brpop("MQ_SEND_TO_KINDLE")
      item = json.loads(_)
      email, title, url, version = item[0], item[1], item[2], item[3]
      logging.debug("email = [%s], title = [%s], url = [%s], version = [%s]",
          email, title, url, version)
      attachment = fetch_as_attachment(title, url, version)
      if attachment:
        send_mail(email, title, attachment)
        os.remove(attachment)
    except Exception as e:
      logging.error(e)
      import time
      time.sleep(1)

if __name__ == '__main__':
  from log import init_log
  init_log("logs/worker.log")
  main()







def main(config_file, task_file, command_file):
    """main function

    Args:
        config_file: config location
        task_file: task location, if it is "", get location from config_file
        command_file: command location if it is "", get from config_file

    Return:
        0: success
        1: fail
    """
    # read config 
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    if not config:
        logging.fatal("Read config_file failed [%s]" %(config_file))
        return 1
    logging.info("Read config_file successful [%s]" % (config_file))
    
    # init log 
    try:
        log_file  = config.get("task_generator", "log")
        log_level = eval(config.get("task_generator", "level"))
    except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
        logging.fatal("%s" % (e))
        return 1
    log.init_log(log_file, log_level)

    # init task_file 
    if task_file == "":
        try:
            task_file = config.get("task_generator", "task_file")
        except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
            logging.fatal("%s" % (e))
            return 1
    logging.info("Set task file [%s] successful" % (task_file)) 
    
    # init command_file 
    if command_file == "":
        try:
            command_file = config.get("task_generator", "command_file")
        except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
            logging.fatal("%s" % (e))
            return 1
    logging.info("Set command file [%s] successful" % (command_file)) 
    
    # init output task_file
    output_task_files = []
    for section in ["data_sampler", "table_checker", "table_join_checker"]:
        try:
            filename = config.get(section, "task_file")
        except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
            logging.fatal("%s" % (e))
            return 1
        output_task_files.append(filename)
    logging.info("Get output task file [%s] successful" % (str(output_task_files)))

    # read task_file and handler information 
    commands_list = get_commands(task_file)
    #print commands_list
    if commands_list is None:
        logging.fatal("Get commands from [%s] failed" % (task_file))
        return 1
    logging.info("Get commands from [%s] successful" % (task_file))
    
    # write commands and generate task file
    tags = ["data_sampler", "table_checker", "table_join_checker"]
    for output_task_file, commands, tag in zip(output_task_files, commands_list, tags):
        ret = write_task(commands, output_task_file, tag)
        if ret != 0:
            logging.fatal("Write [%s] task [%s] failed" % (tag, output_task_file))
            return 1
        logging.info("Write [%s] task [%s] successful" % (tag, output_task_file))
        ret = write_commands(commands, command_file, tag, config_file)
        if ret != 0:
            logging.fatal("Write [%s] commands [%s] failed" % (tag, command_file))
            return 1
        logging.info("Write [%s] commands [%s] successful" % (tag, command_file))

    return 0 
def test_network_for_realtime_on_stop_schedule(environnement, coverage, *networks):
    log.init_log("", "")
    logger = logging.getLogger("vipere")
    logger.info("Vérification des lignes pour le coverage [{}] et sur l'environnement [{}]".format(coverage, environnement))

    if len(networks) == 0:
        logger.error("Au moins un identifiant de réseau doit être passé en paramètre")
        exit()
    params = json.load(open('../params.json'))
    assert (environnement in params['environnements']), "L'environnement demandé n'existe pas"
    navitia_url = params['environnements'][environnement]['url']
    navitia_api_key = params['environnements'][environnement]['key']


    total_nb_tests = 0
    test_result = {}
    test_result['ligne non configurée'] = 0
    test_result['pas horaires du tout'] = 0
    test_result["pas horaires mais c'est normal"] = 0
    test_result['horaires théoriques'] = 0
    test_result['OK'] = 0
    test_result['non testé'] = 0

    detail_test_result =  []
    detail_test_result.append(["coverage", "env", "test_datetime", "object_id", "object_type", "test_category", "error", "infos", "error_level", "wkt"])

    for network in networks:
        appel_nav = requests.get(navitia_url + "coverage/{}/networks/{}/lines?count=0".format(coverage, network), headers={'Authorization': navitia_api_key})
        nb_result = appel_nav.json()['pagination']['total_result']

        appel_nav = requests.get(navitia_url + "coverage/{}/networks/{}/lines?count={}".format(coverage, network, nb_result), headers={'Authorization': navitia_api_key})
        lines = appel_nav.json()['lines']
        for a_line in lines :
            logger.info("Execution du traitement sur le réseau {} et la ligne {}".format(network, a_line["id"]))
            if not "properties" in a_line :
                message = 'pas de configuration temps réel pour la ligne {} ({})'.format(a_line['name'], a_line['id'])
                result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "temps réel mode proxy", "config manquante", message, "green", utils.geojson_to_wkt(a_line['geojson'])  ]
                detail_test_result.append(result)
                test_result['ligne non configurée'] += 1
            else :
                keys = [prop['name'] for prop in a_line['properties']]
                if not "realtime_system" in keys :
                    test_result['ligne non configurée'] += 1
                    message = 'pas de configuration temps réel pour la ligne {} ({})'.format(a_line['name'], a_line['id'])
                    result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "temps réel mode proxy", "config manquante", message, "green", utils.geojson_to_wkt(a_line['geojson'])  ]
                    detail_test_result.append(result)
                    continue

                #je récupère le nombre de stop_points sur ma ligne
                appel_nav = requests.get(navitia_url + "coverage/{}/networks/{}/lines/{}/stop_points?count=200".format(coverage, network, a_line['id']), headers={'Authorization': navitia_api_key})
                if appel_nav.json()['pagination']['total_result'] > 200 :
                    test_result['non testé'] += appel_nav.json()['pagination']['total_result'] - 200
                    message = "Toute la ligne {} ({}) n'a pas été testée car elle a trop d'arrêts".format(a_line['name'], a_line['id'])
                    result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d'), a_line['id'], "line", "temps réel mode proxy", "évo du script à prévoir", message, "red", utils.geojson_to_wkt(a_line['geojson'])  ]
                    detail_test_result.append(result)
                    logger.error(message)

                #je fais un appel grille horaire à l'arrêt pour chaque arrêt de la ligne et je vérifie que j'ai du temps réel
                for a_stop_point in appel_nav.json()['stop_points']:
                    appel_nav = requests.get(navitia_url + "coverage/{}/networks/{}/lines/{}/stop_points/{}/stop_schedules?items_per_schedule=1".format(coverage, network, a_line['id'], a_stop_point['id']), headers={'Authorization': navitia_api_key})
                    for a_schedule in appel_nav.json()['stop_schedules'] :
                        wkt = "POINT({} {})".format(a_schedule['stop_point']["coord"]["lon"], a_schedule['stop_point']["coord"]["lat"])
                        total_nb_tests +=1
                        if len(a_schedule['date_times']) == 0 :
                            if a_schedule['additional_informations'] in ["no_departure_this_day", "partial_terminus", "terminus"] :
                                test_result["pas horaires mais c'est normal"] += 1
                                message = "pas d'horaires aujourd'hui pour l'arrêt {}, la ligne {}, le parcours {} ({}, {}, {})".format(a_schedule['stop_point']['name'], a_schedule['route']['line']['code'],  a_schedule['route']['name'], a_schedule['stop_point']['id'], a_schedule['route']['line']['id'],  a_schedule['route']['id'])
                                result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d')
                                    , a_schedule['stop_point']['id'], "stop_point", "temps réel mode proxy", "pas d'horaires aujourd'hui"
                                    , message
                                    , "green", wkt  ]
                                detail_test_result.append(result)
                            else :
                                message = "pas d'horaires pour l'arrêt {}, la ligne {}, le parcours {} ({}, {}, {})".format(a_schedule['stop_point']['name'], a_schedule['route']['line']['code'],  a_schedule['route']['name'], a_schedule['stop_point']['id'], a_schedule['route']['line']['id'],  a_schedule['route']['id'])
                                result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d')
                                    , a_schedule['stop_point']['id'], "stop_point", "temps réel mode proxy", "pas d'horaires"
                                    , message
                                    , "red", wkt  ]
                                detail_test_result.append(result)
                                test_result['pas horaires du tout'] += 1
                        else :
                            if a_schedule['date_times'][0]['data_freshness'] != "realtime":
                                test_result['horaires théoriques'] += 1
                                message = "pas de temps réel pour l'arrêt {}, la ligne {}, le parcours {} ({}, {}, {})".format(a_schedule['stop_point']['name'], a_schedule['route']['line']['code'],  a_schedule['route']['name'], a_schedule['stop_point']['id'], a_schedule['route']['line']['id'],  a_schedule['route']['id'])
                                result = [coverage, environnement, datetime.date.today().strftime('%Y%m%d')
                                    , a_schedule['stop_point']['id'], "stop_point", "temps réel mode proxy", "horaires théoriques"
                                    , message
                                    , "orange", wkt  ]
                                detail_test_result.append(result)
                            else:
                                test_result['OK'] += 1

    logger.info ("Résultat des tests :")
    logger.info (">> {} cas de tests".format(total_nb_tests))
    logger.info (">> {} ligne(s) sans temps réel configuré".format(test_result['ligne non configurée']))
    logger.info (">> {} cas de services terminés".format(test_result["pas horaires mais c'est normal"]))
    logger.info (">> {} cas où du théorique est renvoyé".format(test_result['horaires théoriques']))
    logger.info (">> {} cas où aucun horaire n'est renvoyé".format(test_result['pas horaires du tout']))
    logger.info (">> {} cas où ça marche !".format(test_result['OK']))
    logger.info (">> au moins {} cas non testés ".format(test_result['non testé']))

    utils.write_errors_to_file (environnement, coverage, "check_realtime_proxy", detail_test_result)
    utils.generate_file_summary()