#!/usr/bin/env python2.7 # -*- coding: utf-8 -*- import tornado, tornado.gen, tornado.web from tornado.options import define, options import datetime import config import do_check from common import my_logger logging = my_logger.Logger("schedules.check_fail.py", False, True, True) setting = { "running": False, # when i was running, Lock me "db": None, "count_remove": 0, "count_update": 0, } @tornado.gen.coroutine def do_check_fail(proxy_item, ip_data): if ip_data: setting["count_update"] += 1 ip_data["data_source"] = proxy_item["data_source"] ip_data["create_datetime"] = ip_data["last_validate_datetime"] # move to available_pool yield [
#!/usr/bin/env python2.7 # -*- coding: utf-8 -*- import tornado, tornado.gen, tornado.web from tornado.options import define, options import datetime import config import do_check from common import my_logger logging = my_logger.Logger("schedules.check_available.py", False, True, True) setting = { "running": False, # when i was running, Lock me "db": None, "count_remove": 0, "count_update": 0, } @tornado.gen.coroutine def do_check_available(proxy_item, ip_data): if ip_data: setting["count_update"] += 1 yield setting["db"][config.setting["available_pool"] ["db_name"]].update({ "_id": proxy_item["_id"], }, {"$set": ip_data})
from tornado import gen, ioloop, httpclient from urllib import urlencode from common import tool, tornado_timmer from common import ( tool, tornado_pool, ) from proxy_crawler import ( validate_www_3322_org as default_validate_site, validate_ayiis_me, ) from common import my_logger logging = my_logger.Logger("proxy_crawler.validate.py", False, True, True) target_collection_name = "available_pool" class DoValidate(object): def __init__(self, arg): super(DoValidate, self).__init__() self.setting = { "stop": False, "total": 0, "done": 0, "good": 0, "callback_func": arg["callback_func"], "timeout": arg["timeout"], "collection": arg["collection"],
from urlparse import urlparse import tornado.httpserver import tornado.ioloop import tornado.iostream import tornado.web import tornado.gen import tornado.httpclient import tornado.httputil import traceback import proxy_manager from common import tool from common import my_logger logging = my_logger.Logger("routes.proxy.py", False, True, True) __all__ = ["ProxyHandler"] class ProxyHandler(tornado.web.RequestHandler): SUPPORTED_METHODS = ["GET", "POST", "CONNECT"] ERROR_STATUS_CODE = 305 # FAIL TO BUILD CONNECTION def initialize(self): self.proxy_item = None def compute_etag(self): return None # disable tornado Etag @tornado.gen.coroutine
import datetime, traceback, os import config from common import ( tool, tornado_pool, my_logger, ) from proxy_crawler import ( # validate_www_3322_org as default_validate_site, validate_ayiis_me as default_validate_site, ) from common import my_logger logging = my_logger.Logger("schedules.do_check.py", False, True, True) class DoCheck(object): def __init__(self, arg): super(DoCheck, self).__init__() self.setting = { "stop": False, "total": 0, "done": 0, "callback_func": arg["callback_func"], "timeout": arg["timeout"], "collection": arg["collection"], "ObjectId": None, "page_size": arg["page_size"], "my_ip": tool.get_my_ip(),