Esempio n. 1
0
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-

import tornado, tornado.gen, tornado.web
from tornado.options import define, options
import datetime

import config
import do_check

from common import my_logger
logging = my_logger.Logger("schedules.check_fail.py", False, True, True)

setting = {
    "running": False,  # when i was running, Lock me
    "db": None,
    "count_remove": 0,
    "count_update": 0,
}


@tornado.gen.coroutine
def do_check_fail(proxy_item, ip_data):

    if ip_data:
        setting["count_update"] += 1
        ip_data["data_source"] = proxy_item["data_source"]
        ip_data["create_datetime"] = ip_data["last_validate_datetime"]

        # move to available_pool
        yield [
Esempio n. 2
0
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-

import tornado, tornado.gen, tornado.web
from tornado.options import define, options
import datetime

import config
import do_check

from common import my_logger

logging = my_logger.Logger("schedules.check_available.py", False, True, True)

setting = {
    "running": False,  # when i was running, Lock me
    "db": None,
    "count_remove": 0,
    "count_update": 0,
}


@tornado.gen.coroutine
def do_check_available(proxy_item, ip_data):

    if ip_data:
        setting["count_update"] += 1
        yield setting["db"][config.setting["available_pool"]
                            ["db_name"]].update({
                                "_id": proxy_item["_id"],
                            }, {"$set": ip_data})
Esempio n. 3
0
from tornado import gen, ioloop, httpclient
from urllib import urlencode
from common import tool, tornado_timmer

from common import (
    tool,
    tornado_pool,
)

from proxy_crawler import (
    validate_www_3322_org as default_validate_site,
    validate_ayiis_me,
)

from common import my_logger
logging = my_logger.Logger("proxy_crawler.validate.py", False, True, True)

target_collection_name = "available_pool"


class DoValidate(object):
    def __init__(self, arg):
        super(DoValidate, self).__init__()
        self.setting = {
            "stop": False,
            "total": 0,
            "done": 0,
            "good": 0,
            "callback_func": arg["callback_func"],
            "timeout": arg["timeout"],
            "collection": arg["collection"],
Esempio n. 4
0
from urlparse import urlparse

import tornado.httpserver
import tornado.ioloop
import tornado.iostream
import tornado.web
import tornado.gen
import tornado.httpclient
import tornado.httputil
import traceback

import proxy_manager
from common import tool

from common import my_logger
logging = my_logger.Logger("routes.proxy.py", False, True, True)

__all__ = ["ProxyHandler"]


class ProxyHandler(tornado.web.RequestHandler):
    SUPPORTED_METHODS = ["GET", "POST", "CONNECT"]
    ERROR_STATUS_CODE = 305 # FAIL TO BUILD CONNECTION

    def initialize(self):
        self.proxy_item = None

    def compute_etag(self):
        return None # disable tornado Etag

    @tornado.gen.coroutine
Esempio n. 5
0
import datetime, traceback, os

import config

from common import (
    tool,
    tornado_pool,
    my_logger,
)

from proxy_crawler import (
    # validate_www_3322_org as default_validate_site,
    validate_ayiis_me as default_validate_site, )

from common import my_logger
logging = my_logger.Logger("schedules.do_check.py", False, True, True)


class DoCheck(object):
    def __init__(self, arg):
        super(DoCheck, self).__init__()
        self.setting = {
            "stop": False,
            "total": 0,
            "done": 0,
            "callback_func": arg["callback_func"],
            "timeout": arg["timeout"],
            "collection": arg["collection"],
            "ObjectId": None,
            "page_size": arg["page_size"],
            "my_ip": tool.get_my_ip(),