Exemple #1
0
class WSClientTransport(WebSocketClient):
    APP_FACTORY = None

    def __init__(self, url):
        self._close_event = Event()
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        WebSocketClient.__init__(self, url)
        self._app = None
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()
        self.sock.sendall = sendall

    def connect(self):
        super(WSClientTransport, self).connect()
        self._app = self.APP_FACTORY(self)
        log.info("Connected to websocket server {0}".format(self.url))

    def closed(self, code, reason=None):
        app, self._app = self._app, None
        if app:
            app.on_close()
        self._close_event.set()

    def ponged(self, pong):
        pass

    def received_message(self, message):
        log.debug("Received message {0}".format(message))
        if self._app:
            self._app.on_received_packet(STRING(message))
        else:
            log.warning('Websocket client app already closed')

    def send_packet(self, data):
        log.debug("Sending message {0}".format(data))
        self.send(data)

    def force_shutdown(self):
        # called by the upper layer, and no callback will be possible when closed
        self._app = None
        self.close()
        self._close_event.set()
        log.info('Websocket client closed')

    def wait_close(self):
        self._close_event.wait()

    def app(self):
        return self._app
Exemple #2
0
Fichier : ws.py Projet : dulton/IVR
class WSServerTransport(WebSocket):
    APP_FACTORY = None

    def __init__(self, *args, **kwargs):
        super(WSServerTransport, self).__init__(*args, **kwargs)
        self._app = None

    def opened(self):
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()
        self.sock.sendall = sendall

        # create app
        if not self.environ.get('QUERY_STRING'):
            query = {}
        else:
            query = urlparse.parse_qs(self.environ['QUERY_STRING'])
        for key, value in query.iteritems():
            query[key] = value[0]
        self._app = self.APP_FACTORY(self, query)

    def closed(self, code, reason=None):
        app, self._app = self._app, None
        if app:
            app.on_close()

    def ponged(self, pong):
        pass

    def received_message(self, message):
        log.debug("Received message {0}".format(message))
        self._app.on_received_packet(STRING(message))

    def send_packet(self, data):
        log.debug("Sending message {0}".format(data))
        self.send(data)

    def force_shutdown(self):
        # called by the upper layer, and no callback will be possible when closed
        log.info("shutdown")
        self._app = None
        self.close()
class ThreadSafeFSM(InstrumentFSM):
    def __init__(self, states, events, enter_event, exit_event):
        self._lock = RLock()
        super(ThreadSafeFSM, self).__init__(states, events, enter_event, exit_event)
    def on_event(self, event, *args, **kwargs):
        with self._lock:
            return super(ThreadSafeFSM, self).on_event(event, *args, **kwargs)
    def on_event_if_free(self, event, *args, **kwargs):
        if not self._lock.acquire(blocking=False):
            raise FSMLockedError
        try:
            retval = super(ThreadSafeFSM, self).on_event(event, *args, **kwargs)
        finally:
            self._lock.release()
        return retval
Exemple #4
0
    def opened(self):
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()
        self.sock.sendall = sendall

        # create app
        try:
            if not self.environ.get('QUERY_STRING'):
                query = {}
            else:
                query = urlparse.parse_qs(self.environ['QUERY_STRING'])
            for key, value in query.iteritems():
                query[key] = value[0]
            self._app = self.APP_FACTORY(self, query)
        except Exception:
            log.exception('Failed to create websocket app')
            raise
Exemple #5
0
 def __init__(self, config):
     super(SudsSOAPWrapper, self).__init__(config)
     self.update_lock = RLock()
     self.config = config
     self.config_no_sensitive = deepcopy(self.config)
     self.config_no_sensitive['password'] = '******'
     self.client = _SudsClientQueue(Queue(self.config['pool_size']),
                                    self.config['name'])
Exemple #6
0
    def _check_channel_lock(self, partner: typing.Address):
        if partner not in self.channel_operations_lock:
            self.channel_operations_lock[partner] = RLock()

        if not self.channel_operations_lock[partner].acquire(blocking=False):
            raise ChannelBusyError(
                f'Channel between {self.node_address} and {partner} is '
                f'busy with another ongoing operation.', )
Exemple #7
0
    def __init__(self, amqp_url='amqp:///', heartbeat=30, debug=False):
        super(BaseConnection, self).__init__()

        self.channel_id = 0
        self.channels = {}
        self.connect_lock = RLock()
        self.channels_lock = RLock()
        self.queue = None
        self.state = STATE_DISCONNECTED
        self.disconnect_event = Event()
        self.debug = debug

        # Negotiate for heartbeats
        self.requested_heartbeat = heartbeat

        (self.username, self.password, self.vhost, self.host, self.port) = \
            parse_amqp_url(str(amqp_url))
Exemple #8
0
    def __init__(self):
        self.lock = RLock()
        self.topic_sub_key_to_msg_id = {} # Topic ID -> Sub key -> Msg ID
        self.topic_msg_id_to_msg = {}     # Topic ID -> Msg ID  -> Message data
        self.msg_id_to_expiration = {}    # Msg ID   -> (Topic ID, sub_keys, expiration time in milliseconds)

        # Start in background a cleanup task that removes all expired messages
        spawn_greenlet(self.run_cleanup_task)
Exemple #9
0
    def __init__(self, config):
        self.config = config
        self.url = '{protocol}://{user}:******@{host}:{port}/{database}'.format(**self.config)
        self.client = ConnectionQueue(
            self.config.pool_size, self.config.queue_build_cap, self.config.name, 'Odoo', self.url, self.add_client)

        self.update_lock = RLock()
        self.logger = getLogger(self.__class__.__name__)
Exemple #10
0
 def __init__(self, services=None, service_store_config=None, odb=None):
     self.services = services
     self.service_store_config = service_store_config
     self.odb = odb
     self.id_to_impl_name = {}
     self.impl_name_to_id = {}
     self.name_to_impl_name = {}
     self.update_lock = RLock()
Exemple #11
0
 def __init__(self, proxy, runner):
     self.proxy = proxy
     self.next = None
     self.prev = None
     self.task_id = JobService.instance().generateUniqueID()
     self.runner = runner
     self.profile = Task.Profile()
     self.locker = RLock()
Exemple #12
0
 def __init__(self, canvas, addr):
     self.canvas = canvas
     self.socket = None
     self.addr = addr
     self.connect_ts = time.time()
     # And this is used to limit clients to X messages per tick
     # We start at 0 (instead of x) to add a reconnect-penalty.
     self.lock = RLock()
Exemple #13
0
    def publish(self, topic, data):
        lock = self.lock
        if not lock:
            lock = RLock()
            self.lock = lock

        with lock:
            return RedisInterconnect.publish(self, topic, data)
    def __init__(self, manager):
        super(RequestCollection, self).__init__()

        self._by_request_id = {}
        self._by_jid = {}
        self._lock = RLock()

        self._manager = manager
Exemple #15
0
 def get_lock(self, lock_name):
     from gevent.lock import RLock
     lock_name = _normalize_path(lock_name)
     if lock_name not in self._store:
         lock = RLock()
         self.write(lock_name, lock)
     else:
         lock = self._store[lock_name]
     return lock
Exemple #16
0
    def __init__(self, sync_objects, eventer):
        super(RequestCollection, self).__init__()

        self._sync_objects = sync_objects
        self._eventer = eventer

        self._by_request_id = {}
        self._by_jid = {}
        self._lock = RLock()
Exemple #17
0
    def add_sub_key_no_lock(self, sub_key):
        """ Adds metadata about a given sub_key - must be called with self.lock held.
        """
        # Already seen it - can be ignored
        if sub_key in self.sub_keys:
            return

        self.sub_keys.add(sub_key)
        self.batch_size[sub_key] = 1

        #
        # A dictionary that maps when GD messages were last time fetched from the SQL database for each sub_key.
        # Since fetching means we are issuing a single query for multiple sub_keys at a time, we need to fetch only these
        # messages that are younger than the oldest value for all of the sub_keys whose messages will be fetched.
        #
        # Let's say we have three sub_keys: a, b, c
        #
        # time 0001: pub to a, b, c
        # time 0001: store last_gd_run = 0001 for each of a, b, c
        # time 0002: pub to a, b
        # time 0002: store last_gd_run = 0002 for a, b
        # time 0003: pub to b, c
        # time 0003: store last_gd_run = 0003 for b, c
        # time 0004: pub to c
        # time 0004: store last_gd_run = 0004 for c
        #
        # We now have: {a:0002, b:0003, c:0004}
        #
        # Let's say we now receive:
        #
        # time 0005: pub to a, b, c
        #
        # Because we want to have a single SQL query for all of a, b, c instead of querying the database for each of sub_key,
        # we need to look up values stored in this dictionary for each of the sub_key and use the smallest one - in this case
        # it would be 0002 for sub_key a. Granted, we know that there won't be any keys for b in the timespan of 0002-0003
        # or for c in the duration of 0003-004, so in the case of these other keys reaching but so back in time is a bit too much
        # but this is all fine anyway because the most important part is that we can still use a single SQL query.
        #
        # Similarly, had it been a pub to b, c in time 0005 then we would be using min of b and c which is 0003.
        #
        # The reason why this is fine is that when we query the database not only do we use this last_gd_run but we also give it
        # a delivery status to return messages by (initialized only) and on top of it, we provide it a list of message IDs
        # that are currently being delivered by tasks, so in other words, we never receive duplicates from the databases
        # that have been already delivered or are about to be.
        #
        self.last_gd_run = {}

        delivery_list = SortedList()
        delivery_lock = RLock()

        self.delivery_lists[sub_key] = delivery_list
        self.delivery_tasks[sub_key] = DeliveryTask(
            self.pubsub, sub_key, delivery_lock, delivery_list,
            self.deliver_pubsub_msg, self.confirm_pubsub_msg_delivered,
            self.pubsub.get_subscription_by_sub_key(sub_key).config)

        self.sub_key_locks[sub_key] = delivery_lock
Exemple #18
0
 def __init__(self, params):
     self.total = 0
     self.processed = 0
     from gevent.lock import RLock
     self.lock = RLock()
     for item in params:
         for test_stage in item['test_stages']:
             for test_name in item['test_stages'][test_stage]:
                 self.total += 1
Exemple #19
0
    def __init__(self, session_id, data):
        self.data = data
        self.session_id = session_id
        self._unacked = []
        self._pubsub = redis_connection.pubsub()

        self._read_lock = RLock()

        self.start_listen()
Exemple #20
0
 def __init__(self, config, server=None):
     # type: (Bunch, ParallelServer)
     self.config = config
     self.config.username_pretty = self.config.username or '(None)'
     self.server = server
     self._client = None
     self.delete_requested = False
     self.is_connected = False
     self.update_lock = RLock()
Exemple #21
0
 def __init__(self, filename, handler_cls, *handler_args, **handler_kwargs):
     handler_level = handler_kwargs.get("level", logging.NOTSET)
     logging.Handler.__init__(self, level=handler_level)
     self.filename = filename
     self.handlers = {}
     self.handlers_lock = RLock()
     self.handler_cls = instance_or_import(handler_cls)
     self.handler_args = handler_args
     self.handler_kwargs = handler_kwargs
Exemple #22
0
 def __init__(self):
     self.registry = Registry(self._delete_callback)
     self.update_lock = RLock()
     self.permissions = {}
     self.http_permissions = {}
     self.role_id_to_name = {}
     self.role_name_to_id = {}
     self.client_def_to_role_id = {}
     self.role_id_to_client_def = {}
Exemple #23
0
    def __init__(self, mesos_master_path, zk_servers, **kwargs):
        self._members = {}
        self._current_leader = None
        self._member_lock = RLock()
        self._zk_server_set = None
        self._next_on_join = self._on_join(mesos_master_path)
        self._next_on_leave = self._on_leave(mesos_master_path)

        super(MesosMasterProxySource, self).__init__(mesos_master_path,
                                                     zk_servers, **kwargs)
class ThreadSafeFSM(InstrumentFSM):
    def __init__(self, states, events, enter_event, exit_event):
        self._lock = RLock()
        super(ThreadSafeFSM, self).__init__(states, events, enter_event,
                                            exit_event)

    def on_event(self, event, *args, **kwargs):
        with self._lock:
            return super(ThreadSafeFSM, self).on_event(event, *args, **kwargs)

    def on_event_if_free(self, event, *args, **kwargs):
        if not self._lock.acquire(blocking=False):
            raise FSMLockedError
        try:
            retval = super(ThreadSafeFSM,
                           self).on_event(event, *args, **kwargs)
        finally:
            self._lock.release()
        return retval
Exemple #25
0
 def event_consumer_loop():
     # xmlrpclib is not reentrant. We might have several greenlets accessing
     # supervisor at the same time so we serialize event treatment here
     lock = RLock()
     for event in channel:
         try:
             with lock:
                 supervisor.publish_event(event)
         except:
             logging.exception('Error processing %s', event)
Exemple #26
0
    def __init__(self, pd_core):
        self._pd_core = pd_core
        self.container = self._pd_core.container
        self.rr = self.container.resource_registry

        self._lock = RLock()  # Master lock protecting data structures
        self._containers = {}  # Registry of containers
        self._processes = {}  # Registry of processes

        self.preconditions_true = gevent.event.Event()
Exemple #27
0
    def __init__(self, logger, pending_online_users, make_func, send_func):
        """Initialize Queue Handler

            :param logger: logger object
            :type logger: Logger
            :param pending_online_users: online users queue
            :type pending_online_users: gevent.queue
            :param make_func: the function to make bundle
            :type make_func: lambda,instancemethod,function
            :param send_func: the function to send bundle
            :type send_func: lambda,instancemethod,function
        """
        self.alive = True
        self.last_idx = None
        self.logger = logger
        self.pending_online_users = pending_online_users
        self._pause_lock = RLock()
        self._make_func = make_func
        self._send_func = send_func#self._send_func
Exemple #28
0
    def __init__(self, pubsub):
        self.pubsub = pubsub        # type: PubSub
        self.sub_key_to_msg_id = {} # Sub key  -> Msg ID set --- What messages are available for a given subcriber
        self.msg_id_to_sub_key = {} # Msg ID   -> Sub key set  - What subscribers are interested in a given message
        self.msg_id_to_msg = {}     # Msg ID   -> Message data - What is the actual contents of each message
        self.topic_msg_id = {}      # Topic ID -> Msg ID set --- What messages are available for each topic (no matter sub_key)
        self.lock = RLock()

        # Start in background a cleanup task that deletes all expired and removed messages
        spawn_greenlet(self.run_cleanup_task)
Exemple #29
0
 def __init__(self, services=None, service_store_config=None, odb=None, server=None):
     self.services = services
     self.service_store_config = service_store_config
     self.odb = odb
     self.server = server
     self.id_to_impl_name = {}
     self.impl_name_to_id = {}
     self.name_to_impl_name = {}
     self.update_lock = RLock()
     self.patterns_matcher = Matcher()
Exemple #30
0
Fichier : ws.py Projet : dulton/IVR
    def __init__(self, url):
        self._close_event = Event()
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        WebSocketClient.__init__(self, url)
        self._app = None
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()

        self.sock.sendall = sendall
Exemple #31
0
    def __init__(self, config):
        self.config = config

        self.client = ConnectionQueue(self.config.pool_size,
                                      self.config.queue_build_cap,
                                      self.config.name, 'OpenStack Swift',
                                      self.config.auth_url, self.add_client)

        self.update_lock = RLock()
        self.logger = getLogger(self.__class__.__name__)
Exemple #32
0
    def __init__(self, config, on_message_callback):
        self.config = config
        self.on_message_callback = on_message_callback
        self.address = config.address
        self.poll_interval = config.poll_interval
        self.pool_strategy = config.pool_strategy
        self.service_source = config.service_source
        self.keep_running = True
        self.tcp_port = int(self.address.split(':')[-1])

        # A hundred years in seconds, used when creating internal workers
        self.y100 = 60 * 60 * 24 * 365 * 100

        # So they do not have to be looked up on each request or event
        self.has_info = logger.isEnabledFor(logging.INFO)
        self.has_debug = logger.isEnabledFor(logging.DEBUG)
        self.has_pool_strategy_simple = self.pool_strategy == ZMQ.POOL_STRATEGY_NAME.SINGLE
        self.has_service_source_zato = self.service_source == ZMQ.SERVICE_SOURCE_NAME.ZATO
        self.zato_service_name = config.service_name
        self.zato_channel = CHANNEL.ZMQ

        if self.has_pool_strategy_simple:
            self.workers_pool_initial = 1
            self.workers_pool_mult = 0
            self.workers_pool_max = 1
        else:
            self.workers_pool_initial = config.workers_pool_initial
            self.workers_pool_mult = config.workers_pool_mult
            self.workers_pool_max = config.workers_pool_max

        # Maps service names to workers registered to handle requests to that service
        self.services = {}

        # Details about each worker, mapped by worker_id:Worker object
        self.workers = {}

        # Held upon most operations on sockets
        self.lock = RLock()

        # How often, in seconds, to send a heartbeat to workers
        self.heartbeat = config.heartbeat

        self.ctx = zmq.Context()
        self.socket = self.ctx.socket(zmq.ROUTER)
        self.socket.linger = config.linger
        self.poller = zmq.Poller()
        self.poller.register(self.socket, zmq.POLLIN)

        # Maps event IDs to methods that handle a given one
        self.handle_event_map = {
            const.v01.ready: self.on_event_ready,
            const.v01.reply_from_worker: self.on_event_reply,
            const.v01.heartbeat: self.on_event_heartbeat,
            const.v01.disconnect: self.on_event_disconnect,
        }
Exemple #33
0
 def __init__(self):
     self.client = None
     self.server = None
     self.datastore = None
     self.hdf = None
     self.hdf_group = None
     self.config = None
     self.event_queue = collections.deque()
     self.event_lock = RLock()
     self.logger = logging.getLogger('statd')
     self.data_sources = {}
Exemple #34
0
    def __init__(self, config, conn_type):
        self.conn_type = conn_type
        self.config = config

        self.client = ConnectionQueue(self.config.pool_size,
                                      self.config.queue_build_cap,
                                      self.config.name, self.conn_type,
                                      self.config.auth_url, self.add_client)

        self.update_lock = RLock()
        self.logger = logging.getLogger(self.__class__.__name__)
Exemple #35
0
 def __init__(self, logger):
     Greenlet.__init__(self)
     self.logger = logger
     self._users_lock = RLock()
     self._msgs = {}
     self._users = {}
     self.send_queue = Queue()
     self.pending_online_users = Queue()
     self.bootstrap()
     self._dying = False
     self.start()
Exemple #36
0
    def __init__(self, server):
        self.server = server
        self.lock = RLock()
        self.default = _NotConfiguredAPI()
        self.caches = {
            CACHE.TYPE.BUILTIN: {},
            CACHE.TYPE.MEMCACHED: {},
        }

        self.builtin = self.caches[CACHE.TYPE.BUILTIN]
        self.memcached = self.caches[CACHE.TYPE.MEMCACHED]
Exemple #37
0
    def __init__(self, args):
        # TODO: make separate queues for fast logging
        self.args = args
        self.lock = RLock()

        # Colorama init
        init()
        # Initialise logging
        self._init_logger()
        # Initialise output
        self._init_output()
        # Stats
        self.urls_scanned = 0
Exemple #38
0
    def __init__(self):
        httplib.HTTPConnection.debuglevel = self.http_debuglevel
        self.network = NetworkManager(crawler=self)

        self.pool = Pool()
        self.lock = RLock()
        self.bloom_filters = {}
        self.name = self.__class__.__name__
        self._status = {
            'process_count': 0,
            'is_stop': True,
            'run_seconds': 0,
            'crawler_name': self.name,
        }
Exemple #39
0
 def __init__(self, dispatcher):
     self.dispatcher = dispatcher
     self.task_list = []
     self.task_queue = Queue()
     self.resource_graph = dispatcher.resource_graph
     self.threads = []
     self.executors = []
     self.logger = logging.getLogger('Balancer')
     self.dispatcher.require_collection('tasks', 'serial', type='log')
     self.create_initial_queues()
     self.schedule_lock = RLock()
     self.distribution_lock = RLock()
     self.debugger = None
     self.debugged_tasks = None
     self.dispatcher.register_event_type('task.changed')
Exemple #40
0
    def __init__(self, url):
        self._close_event = Event()
        # patch socket.sendall to protect it with lock,
        # in order to prevent sending data from multiple greenlets concurrently
        WebSocketClient.__init__(self, url)
        self._app = None
        self._lock = RLock()
        _sendall = self.sock.sendall

        def sendall(data):
            self._lock.acquire()
            try:
                _sendall(data)
            except:
                raise
            finally:
                self._lock.release()
        self.sock.sendall = sendall
Exemple #41
0
 def __init__(self):
     self.pending_calls = {}
     self.pending_events = []
     self.event_handlers = {}
     self.rpc = None
     self.event_callback = None
     self.error_callback = None
     self.rpc_callback = None
     self.receive_thread = None
     self.token = None
     self.event_distribution_lock = RLock()
     self.event_emission_lock = RLock()
     self.default_timeout = 20
     self.scheme = None
     self.transport = None
     self.parsed_url = None
     self.last_event_burst = None
     self.use_bursts = False
     self.event_cv = Event()
     self.event_thread = None
Exemple #42
0
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger("Balancer")
        self.dispatcher.require_collection("tasks", "serial", type="log")
        self.create_initial_queues()
        self.start_executors()
        self.schedule_lock = RLock()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type("task.changed")

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query("tasks", ("state", "in", ["EXECUTING", "WAITING", "CREATED"])):
            self.logger.info(
                "Stale task ID: {0}, name: {1} being set to FAILED".format(stale_task["id"], stale_task["name"])
            )

            stale_task.update(
                {
                    "state": "FAILED",
                    "error": {
                        "type": "TaskException",
                        "message": "dispatcher process died",
                        "code": errno.EINTR,
                        "stacktrace": "",
                        "extra": None,
                    },
                }
            )

            dispatcher.datastore.update("tasks", stale_task["id"], stale_task)
Exemple #43
0
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger('Balancer')
        self.dispatcher.require_collection('tasks', 'serial', type='log')
        self.create_initial_queues()
        self.start_executors()
        self.schedule_lock = RLock()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type('task.changed')

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query('tasks', ('state', 'in', ['EXECUTING', 'WAITING', 'CREATED'])):
            self.logger.info('Stale task ID: {0}, name: {1} being set to FAILED'.format(
                stale_task['id'],
                stale_task['name']
            ))

            stale_task.update({
                'state': 'FAILED',
                'error': {
                    'type': 'TaskException',
                    'message': 'dispatcher process died',
                    'code': errno.EINTR,
                    'stacktrace': '',
                    'extra': None
                }
            })

            dispatcher.datastore.update('tasks', stale_task['id'], stale_task)
Exemple #44
0
class Task:
    """
	Task描述一次服务数据请求处理
	Task任务链式钩挂
	"""

    class Profile:
        def __init__(self):
            self.start_time = 0
            self.end_time = 0
            self.last_watch_time = 0  #最近一次观察记录时间
            self.status = JobStatusType.STOPPED
            self.result = None  # task 运行结果

    def __init__(self, proxy, runner):
        self.proxy = proxy
        self.next = None
        self.prev = None
        self.task_id = JobService.instance().generateUniqueID()
        self.runner = runner
        self.profile = Task.Profile()
        self.locker = RLock()

    def chainNext(self, task):
        self.next = task
        self.next.prev = self
        return self

    def getUniqueID(self):
        return self.task_id

    @property
    def ID(self):
        return self.getUniqueID()

    def execute(self, job):
        self.locker.acquire()
        try:
            task_id = self.getUniqueID()
            result = self.proxy.createTask(task_id, job)
            if result.status == CallReturnStatusValueType.SUCC:
                self.profile.start_time = int(time.time())
                self.profile.status = JobStatusType.RUNNING
                JobService.instance().onJobTaskStarted(self)
            return result
        finally:
            self.locker.release()

    def onFinished(self, task_result):
        self.locker.acquire()
        try:
            self.profile.end_time = int(time.time())
            self.profile.status = JobStatusType.FINISHED
            self.profile.result = task_result
            self.runner.getProfile().result = task_result  #

            self.runner.onTaskFinished(self)
        finally:
            self.locker.release()

    def onError(self, task_result={}):
        self.locker.acquire()
        try:
            self.profile.end_time = int(time.time())
            self.profile.status = JobStatusType.FAILED

            self.runner.getProfile().result = task_result
            self.runner.onTaskError(self)
        finally:
            self.locker.release()

    def onWatchTime(self):
        return
        try:
            result = self.proxy.watchTask(self.getUniqueID())
            self.profile.last_watch_time = int(time.time())
        except:
            traceback.print_exc()
Exemple #45
0
class Balancer(object):
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger("Balancer")
        self.dispatcher.require_collection("tasks", "serial", type="log")
        self.create_initial_queues()
        self.start_executors()
        self.schedule_lock = RLock()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type("task.changed")

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query("tasks", ("state", "in", ["EXECUTING", "WAITING", "CREATED"])):
            self.logger.info(
                "Stale task ID: {0}, name: {1} being set to FAILED".format(stale_task["id"], stale_task["name"])
            )

            stale_task.update(
                {
                    "state": "FAILED",
                    "error": {
                        "type": "TaskException",
                        "message": "dispatcher process died",
                        "code": errno.EINTR,
                        "stacktrace": "",
                        "extra": None,
                    },
                }
            )

            dispatcher.datastore.update("tasks", stale_task["id"], stale_task)

    def create_initial_queues(self):
        self.resource_graph.add_resource(Resource("system"))

    def start_executors(self):
        for i in range(0, self.dispatcher.configstore.get("middleware.executors_count")):
            self.logger.info("Starting task executor #{0}...".format(i))
            self.executors.append(TaskExecutor(self, i))

    def start(self):
        self.threads.append(gevent.spawn(self.distribution_thread))
        self.logger.info("Started")

    def schema_to_list(self, schema):
        return {
            "type": "array",
            "items": schema,
            "minItems": sum([1 for x in schema if "mandatory" in x and x["mandatory"]]),
            "maxItems": len(schema),
        }

    def verify_schema(self, clazz, args, strict=False):
        if not hasattr(clazz, "params_schema"):
            return []

        schema = self.schema_to_list(clazz.params_schema)
        val = validator.create_validator(schema, resolver=self.dispatcher.rpc.get_schema_resolver(schema))
        if strict:
            val.fail_read_only = True
        else:
            val.remove_read_only = True

        return list(val.iter_errors(args))

    def submit(self, name, args, sender, env=None):
        if name not in self.dispatcher.tasks:
            self.logger.warning("Cannot submit task: unknown task type %s", name)
            raise RpcException(errno.EINVAL, "Unknown task type {0}".format(name))

        task = Task(self.dispatcher, name)
        task.user = sender.user.name
        task.session_id = sender.session_id
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.hooks = self.dispatcher.task_hooks.get(name, {})
        task.args = copy.deepcopy(args)
        task.strict_verify = "strict_validation" in sender.enabled_features

        if env:
            if not isinstance(env, dict):
                raise ValueError("env must be a dict")

            task.environment = copy.deepcopy(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        if "RUN_AS_USER" in task.environment:
            task.user = task.environment["RUN_AS_USER"]

        task.environment["SENDER_ADDRESS"] = sender.client_address
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.set_state(TaskState.CREATED)
        self.task_queue.put(task)
        self.logger.info("Task %d submitted (type: %s, class: %s)", task.id, name, task.clazz)
        return task.id

    def submit_with_upload(self, task_name, args, sender, env=None):
        task_metadata = self.dispatcher.tasks[task_name]._get_metadata()
        schema = task_metadata["schema"]

        if schema is None:
            raise RpcException(errno.ENOENT, "Task {0} has no schema associated with it".format(task_name))
        upload_token_list = []
        for idx, arg in enumerate(schema):
            if arg.get("type") == "fd":
                rfd, wfd = os.pipe()
                token = self.dispatcher.token_store.issue_token(
                    FileToken(
                        user=sender.user,
                        lifetime=60,
                        direction="upload",
                        file=FileObjectPosix(wfd, "wb", close=True),
                        name=str(uuid.uuid4()),
                        size=None,
                    )
                )
                upload_token_list.append(token)
                args[idx] = FileDescriptor(rfd)
        task_id = self.submit(task_name, args, sender, env)
        return task_id, upload_token_list

    def submit_with_download(self, task_name, args, sender, env=None):
        task_metadata = self.dispatcher.tasks[task_name]._get_metadata()
        schema = task_metadata["schema"]
        url_list = []

        if schema is None:
            raise RpcException(errno.ENOENT, "Task {0} has no schema associated with it".format(task_name))

        for idx, arg in enumerate(schema):
            if arg.get("type") == "fd":
                rfd, wfd = os.pipe()
                url_list.append(
                    "/dispatcher/filedownload?token={0}".format(
                        self.dispatcher.token_store.issue_token(
                            FileToken(
                                user=sender.user,
                                lifetime=60,
                                direction="download",
                                file=FileObjectPosix(rfd, "rb", close=True),
                                name=args[idx],
                            )
                        )
                    )
                )
                args[idx] = FileDescriptor(wfd)
        task_id = self.submit(task_name, args, sender, env)
        return task_id, url_list

    def verify_subtask(self, parent, name, args):
        clazz = self.dispatcher.tasks[name]
        instance = clazz(self.dispatcher, self.dispatcher.datastore)
        return instance.verify(*args)

    def run_subtask(self, parent, name, args, env=None):
        args = list(args)
        task = Task(self.dispatcher, name)
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.hooks = self.dispatcher.task_hooks.get(name, {})
        task.args = args
        task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
        task.instance.verify(*task.args)
        task.description = task.instance.describe(*task.args)
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.parent = parent
        task.environment = {}

        if parent:
            task.environment = copy.deepcopy(parent.environment)
            task.environment["parent"] = parent.id
            task.user = parent.user

        if env:
            if not isinstance(env, dict):
                raise ValueError("env must be a dict")

            task.environment.update(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.set_state(TaskState.CREATED)
        self.task_list.append(task)

        task.start()
        return task

    def join_subtasks(self, *tasks):
        for i in tasks:
            i.join()

    def abort(self, id, error=None):
        task = self.get_task(id)
        if not task:
            self.logger.warning("Cannot abort task: unknown task id %d", id)
            return

        success = False
        if task.started_at is None:
            success = True
        else:
            try:
                task.executor.abort()
            except:
                pass
        if success:
            task.ended.set()
            if error:
                task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(error))
                self.logger.debug("Task ID: %d, name: %s aborted with error", task.id, task.name)
            else:
                task.set_state(TaskState.ABORTED, TaskStatus(0, "Aborted"))
                self.logger.debug("Task ID: %d, name: %s aborted by user", task.id, task.name)

    def task_exited(self, task):
        self.resource_graph.release(*task.resources)
        self.schedule_tasks(True)

    def schedule_tasks(self, exit=False):
        """
        This function is called when:
        1) any new task is submitted to any of the queues
        2) any task exists
        """
        with self.schedule_lock:
            started = 0
            executing_tasks = [t for t in self.task_list if t.state == TaskState.EXECUTING]
            waiting_tasks = [t for t in self.task_list if t.state == TaskState.WAITING]

            for task in waiting_tasks:
                if not self.resource_graph.can_acquire(*task.resources):
                    continue

                self.resource_graph.acquire(*task.resources)
                self.threads.append(task.start())
                started += 1

            if not started and not executing_tasks and (exit or len(waiting_tasks) == 1):
                for task in waiting_tasks:
                    # Check whether or not task waits on nonexistent resources. If it does,
                    # abort it 'cause there's no chance anymore that missing resources will appear.
                    if any(self.resource_graph.get_resource(res) is None for res in task.resources):
                        self.logger.warning("Aborting task {0}: deadlock".format(task.id))
                        self.abort(task.id, VerifyException(errno.EBUSY, "Resource deadlock avoided"))

    def distribution_thread(self):
        while True:
            self.task_queue.peek()
            self.distribution_lock.acquire()
            task = self.task_queue.get()

            try:
                self.logger.debug("Picked up task %d: %s with args %s", task.id, task.name, task.args)

                errors = self.verify_schema(self.dispatcher.tasks[task.name], task.args, task.strict_verify)
                if len(errors) > 0:
                    errors = list(validator.serialize_errors(errors))
                    self.logger.warning(
                        "Cannot submit task {0}: schema verification failed with errors {1}".format(task.name, errors)
                    )
                    raise ValidationException(extra=errors)

                task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
                task.resources = task.instance.verify(*task.args)
                task.description = task.instance.describe(*task.args)

                if type(task.resources) is not list:
                    raise ValueError("verify() returned something else than resource list")

            except Exception as err:
                self.logger.warning("Cannot verify task %d: %s", task.id, err)
                task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(err))
                task.ended.set()
                self.distribution_lock.release()

                if not isinstance(err, VerifyException):
                    self.dispatcher.report_error("Task {0} verify() method raised invalid exception".format(err), err)

                continue

            task.set_state(TaskState.WAITING)
            self.task_list.append(task)
            self.distribution_lock.release()
            self.schedule_tasks()
            if task.resources:
                self.logger.debug("Task %d assigned to resources %s", task.id, ",".join(task.resources))

    def assign_executor(self, task):
        for i in self.executors:
            with i.cv:
                if i.state == WorkerState.IDLE:
                    self.logger.info("Task %d assigned to executor #%d", task.id, i.index)
                    task.executor = i
                    i.state = WorkerState.ASSIGNED
                    return

        # Out of executors! Need to spawn new one
        executor = TaskExecutor(self, len(self.executors))
        self.executors.append(executor)
        with executor.cv:
            executor.cv.wait_for(lambda: executor.state == WorkerState.IDLE)
            executor.state = WorkerState.ASSIGNED
            task.executor = executor
            self.logger.info("Task %d assigned to executor #%d", task.id, executor.index)

    def dispose_executors(self):
        for i in self.executors:
            i.die()

    def get_active_tasks(self):
        return [x for x in self.task_list if x.state in (TaskState.CREATED, TaskState.WAITING, TaskState.EXECUTING)]

    def get_tasks(self, type=None):
        if type is None:
            return self.task_list

        return [x for x in self.task_list if x.state == type]

    def get_task(self, id):
        self.distribution_lock.acquire()
        t = first_or_default(lambda x: x.id == id, self.task_list)
        if not t:
            t = first_or_default(lambda x: x.id == id, self.task_queue.queue)

        self.distribution_lock.release()
        return t

    def get_executor_by_key(self, key):
        return first_or_default(lambda t: t.key == key, self.executors)

    def get_executor_by_sender(self, sender):
        return first_or_default(lambda t: t.conn == sender, self.executors)
 def __init__(self, states, events, enter_event, exit_event):
     self._lock = RLock()
     super(ThreadSafeFSM, self).__init__(states, events, enter_event, exit_event)
Exemple #47
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger('ResourceGraph')
        self.mutex = RLock()
        self.root = Resource('root')
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None):
        self.lock()

        if not resource:
            self.unlock()
            raise ResourceError('Invalid resource')

        if self.get_resource(resource.name):
            self.unlock()
            raise ResourceError('Resource {0} already exists'.format(resource.name))

        self.resources.add_node(resource)
        if not parents:
            parents = ['root']

        for p in parents:
            node = self.get_resource(p)
            if not node:
                self.unlock()
                raise ResourceError('Invalid parent resource {0}'.format(p))

            self.resources.add_edge(node, resource)

        self.unlock()

    def remove_resource(self, name):
        self.lock()
        resource = self.get_resource(name)

        if not resource:
            self.unlock()
            return

        for i in nx.descendants(self.resources, resource):
            self.resources.remove_node(i)

        self.resources.remove_node(resource)
        self.unlock()

    def update_resource(self, name, new_parents):
        self.lock()
        resource = self.get_resource(name)

        if not resource:
            self.unlock()
            return

        for i in nx.descendants(self.resources, resource):
            self.resources.remove_node(i)

        for p in new_parents:
            node = self.get_resource(p)
            if not node:
                self.unlock()
                raise ResourceError('Invalid parent resource {0}'.format(p))

            self.resources.add_edge(node, resource)

        self.unlock()

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        self.lock()
        self.logger.debug('Acquiring following resources: %s', ','.join(names))

        for name in names:
            res = self.get_resource(name)
            if not res:
                raise ResourceError('Resource {0} not found'.format(name))

            for i in nx.descendants(self.resources, res):
                if i.busy:
                    self.unlock()
                    raise ResourceError('Cannot acquire, some of dependent resources are busy')

            res.busy = True

        self.unlock()

    def can_acquire(self, *names):
        self.lock()
        self.logger.debug('Trying to acquire following resources: %s', ','.join(names))

        for name in names:
            res = self.get_resource(name)
            if not res:
                self.unlock()
                return False

            if res.busy:
                self.unlock()
                return False

            for i in nx.descendants(self.resources, res):
                if i.busy:
                    self.unlock()
                    return False

        self.unlock()
        return True

    def release(self, *names):
        self.lock()
        self.logger.debug('Releasing following resources: %s', ','.join(names))

        for name in names:
            res = self.get_resource(name)
            res.busy = False

        self.unlock()
Exemple #48
0
class Client(object):
    class PendingCall(object):
        def __init__(self, id, method, args=None):
            self.id = id
            self.method = method
            self.args = args
            self.result = None
            self.error = None
            self.completed = Event()
            self.callback = None

    class SubscribedEvent(object):
        def __init__(self, name, *filters):
            self.name = name
            self.refcount = 0
            self.filters = filters

        def match(self, name, args):
            if self.name != name:
                return False

            if self.filters:
                return match(args, *self.filters)

    def __init__(self):
        self.pending_calls = {}
        self.pending_events = []
        self.event_handlers = {}
        self.rpc = None
        self.event_callback = None
        self.error_callback = None
        self.rpc_callback = None
        self.receive_thread = None
        self.token = None
        self.event_distribution_lock = RLock()
        self.event_emission_lock = RLock()
        self.default_timeout = 20
        self.scheme = None
        self.transport = None
        self.parsed_url = None
        self.last_event_burst = None
        self.use_bursts = False
        self.event_cv = Event()
        self.event_thread = None

    def __pack(self, namespace, name, args, id=None):
        return dumps({
            'namespace': namespace,
            'name': name,
            'args': args,
            'id': str(id if id is not None else uuid.uuid4())
        })

    def __call_timeout(self, call):
        pass

    def __call(self, pending_call, call_type='call', custom_payload=None):
        if custom_payload is None:
            payload = {
                'method': pending_call.method,
                'args': pending_call.args,
            }
        else:
            payload = custom_payload

        self.__send(self.__pack(
            'rpc',
            call_type,
            payload,
            pending_call.id
        ))

    def __send_event(self, name, params):
        self.__send(self.__pack(
            'events',
            'event',
            {'name': name, 'args': params}
        ))

    def __send_event_burst(self):
        with self.event_emission_lock:
            self.__send(self.__pack(
                'events',
                'event_burst',
                {'events': list([{'name': t[0], 'args': t[1]} for t in self.pending_events])},
            ))

            del self.pending_events[:]

    def __send_error(self, id, errno, msg, extra=None):
        payload = {
            'code': errno,
            'message': msg
        }

        if extra is not None:
            payload.update(extra)

        self.__send(self.__pack('rpc', 'error', id=id, args=payload))

    def __send_response(self, id, resp):
        self.__send(self.__pack('rpc', 'response', id=id, args=resp))

    def __send(self, data):
        debug_log('<- {0}', data)
        self.transport.send(data)

    def recv(self, message):
        if isinstance(message, bytes):
            message = message.decode('utf-8')
        debug_log('-> {0}', message)
        try:
            msg = loads(message)
        except ValueError as err:
            if self.error_callback is not None:
                self.error_callback(ClientError.INVALID_JSON_RESPONSE, err)
            return

        self.decode(msg)

    def __process_event(self, name, args):
        self.event_distribution_lock.acquire()
        if name in self.event_handlers:
            for h in self.event_handlers[name]:
                h(args)

        if self.event_callback:
            self.event_callback(name, args)

        self.event_distribution_lock.release()

    def __event_emitter(self):
        while True:
            self.event_cv.wait()

            while len(self.pending_events) > 0:
                time.sleep(0.1)
                with self.event_emission_lock:
                    self.__send_event_burst()

    def wait_forever(self):
        if os.getenv("DISPATCHERCLIENT_TYPE") == "GEVENT":
            import gevent
            while True:
                gevent.sleep(60)
        else:
            while True:
                time.sleep(60)

    def drop_pending_calls(self):
        message = "Connection closed"
        for key, call in self.pending_calls.items():
            call.result = None
            call.error = {
                "code":  errno.ECONNABORTED,
                "message": message
            }
            call.completed.set()
            del self.pending_calls[key]

    def decode(self, msg):
        if 'namespace' not in msg:
            self.error_callback(ClientError.INVALID_JSON_RESPONSE)
            return

        if 'name' not in msg:
            self.error_callback(ClientError.INVALID_JSON_RESPONSE)
            return

        if msg['namespace'] == 'events' and msg['name'] == 'event':
            args = msg['args']
            t = spawn_thread(target=self.__process_event, args=(args['name'], args['args']))
            t.start()
            return

        if msg['namespace'] == 'events' and msg['name'] == 'event_burst':
            args = msg['args']
            for i in args['events']:
                t = spawn_thread(target=self.__process_event, args=(i['name'], i['args']))
                t.start()
            return

        if msg['namespace'] == 'events' and msg['name'] == 'logout':
            self.error_callback(ClientError.LOGOUT)
            return

        if msg['namespace'] == 'rpc':
            if msg['name'] == 'call':
                if self.rpc is None:
                    self.__send_error(msg['id'], errno.EINVAL, 'Server functionality is not supported')
                    return

                if 'args' not in msg:
                    self.__send_error(msg['id'], errno.EINVAL, 'Malformed request')
                    return

                args = msg['args']
                if 'method' not in args or 'args' not in args:
                    self.__send_error(msg['id'], errno.EINVAL, 'Malformed request')
                    return

                def run_async(msg, args):
                    try:
                        result = self.rpc.dispatch_call(args['method'], args['args'], sender=self)
                    except rpc.RpcException as err:
                        self.__send_error(msg['id'], err.code, err.message)
                    else:
                        self.__send_response(msg['id'], result)

                t = spawn_thread(target=run_async, args=(msg, args))
                t.start()
                return

            if msg['name'] == 'response':
                if msg['id'] in self.pending_calls.keys():
                    call = self.pending_calls[msg['id']]
                    call.result = msg['args']
                    call.completed.set()
                    if call.callback is not None:
                        call.callback(msg['args'])

                    del self.pending_calls[str(call.id)]
                else:
                    if self.error_callback is not None:
                        self.error_callback(ClientError.SPURIOUS_RPC_RESPONSE, msg['id'])

            if msg['name'] == 'error':
                if msg['id'] in self.pending_calls.keys():
                    call = self.pending_calls[msg['id']]
                    call.result = None
                    call.error = msg['args']
                    call.completed.set()
                    del self.pending_calls[str(call.id)]
                if self.error_callback is not None:
                    self.error_callback(ClientError.RPC_CALL_ERROR)

    def parse_url(self, url):
        self.parsed_url = urlsplit(url, scheme="http")
        self.scheme = self.parsed_url.scheme

    def connect(self, url, **kwargs):
        self.parse_url(url)
        if not self.scheme:
            self.scheme = kwargs.get('scheme',"ws")
        else:
            if 'scheme' in kwargs:
                raise ValueError('Connection scheme cannot be delared in both url and arguments.')
        if self.scheme is "http":
            self.scheme = "ws"

        builder = ClientTransportBuilder()
        self.transport = builder.create(self.scheme)
        self.transport.connect(self.parsed_url, self, **kwargs)
        debug_log('Connection opened, local address {0}', self.transport.address)

        if self.use_bursts:
            self.event_thread = spawn_thread(target=self.__event_emitter, args=())
            self.event_thread.start()

    def login_user(self, username, password, timeout=None):
        call = self.PendingCall(uuid.uuid4(), 'auth')
        self.pending_calls[str(call.id)] = call
        self.__call(call, call_type='auth', custom_payload={'username': username, 'password': password})
        call.completed.wait(timeout)
        if call.error:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        self.token = call.result[0]

    def login_service(self, name, timeout=None):
        call = self.PendingCall(uuid.uuid4(), 'auth')
        self.pending_calls[str(call.id)] = call
        self.__call(call, call_type='auth_service', custom_payload={'name': name})
        if call.error:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        call.completed.wait(timeout)

    def login_token(self, token, timeout=None):
        call = self.PendingCall(uuid.uuid4(), 'auth')
        self.pending_calls[str(call.id)] = call
        self.__call(call, call_type='auth_token', custom_payload={'token': token})
        call.completed.wait(timeout)
        if call.error:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        self.token = call.result[0]

    def disconnect(self):
        debug_log('Closing connection, local address {0}', self.transport.address)
        self.transport.close()

    def enable_server(self):
        self.rpc = rpc.RpcContext()

    def on_event(self, callback):
        self.event_callback = callback

    def on_call(self, callback):
        self.rpc_callback = callback

    def on_error(self, callback):
        self.error_callback = callback

    def subscribe_events(self, *masks):
        self.__send(self.__pack('events', 'subscribe', masks))

    def unsubscribe_events(self, *masks):
        self.__send(self.__pack('events', 'unsubscribe', masks))

    def register_service(self, name, impl):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.rpc.register_service_instance(name, impl)
        self.call_sync('plugin.register_service', name)

    def unregister_service(self, name):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.rpc.unregister_service(name)
        self.call_sync('plugin.unregister_service', name)

    def resume_service(self, name):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.call_sync('plugin.resume_service', name)

    def register_schema(self, name, schema):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.call_sync('plugin.register_schema', name, schema)

    def unregister_schema(self, name):
        if self.rpc is None:
            raise RuntimeError('Call enable_server() first')

        self.call_sync('plugin.unregister_schema', name)

    def call_async(self, name, callback, *args):
        call = self.PendingCall(uuid.uuid4(), name, args)
        self.pending_calls[call.id] = call

    def call_sync(self, name, *args, **kwargs):
        timeout = kwargs.pop('timeout', self.default_timeout)
        call = self.PendingCall(uuid.uuid4(), name, args)
        self.pending_calls[str(call.id)] = call
        self.__call(call)

        if not call.completed.wait(timeout):
            if self.error_callback:
                self.error_callback(ClientError.RPC_CALL_TIMEOUT, method=call.method, args=call.args)

            raise rpc.RpcException(errno.ETIMEDOUT, 'Call timed out')

        if call.result is None and call.error is not None:
            raise rpc.RpcException(
                call.error['code'],
                call.error['message'],
                call.error['extra'] if 'extra' in call.error else None)

        return call.result

    def call_task_sync(self, name, *args):
        tid = self.call_sync('task.submit', name, args)
        self.call_sync('task.wait', tid, timeout=3600)
        return self.call_sync('task.status', tid)

    def submit_task(self, name, *args):
        return self.call_sync('task.submit', name, args)

    def emit_event(self, name, params):
        if not self.use_bursts:
            self.__send_event(name, params)
        else:
            self.pending_events.append((name, params))
            self.event_cv.set()
            self.event_cv.clear()

    def register_event_handler(self, name, handler):
        if name not in self.event_handlers:
            self.event_handlers[name] = []

        self.event_handlers[name].append(handler)
        self.subscribe_events(name)
        return handler

    def unregister_event_handler(self, name, handler):
        self.event_handlers[name].remove(handler)

    def exec_and_wait_for_event(self, event, match_fn, fn, timeout=None):
        done = Event()
        self.subscribe_events(event)
        self.event_distribution_lock.acquire()

        try:
            fn()
        except:
            self.event_distribution_lock.release()
            raise

        def handler(args):
            if match_fn(args):
                done.set()

        self.register_event_handler(event, handler)
        self.event_distribution_lock.release()
        done.wait(timeout=timeout)
        self.unregister_event_handler(event, handler)

    def test_or_wait_for_event(self, event, match_fn, initial_condition_fn, timeout=None):
        done = Event()
        self.subscribe_events(event)
        self.event_distribution_lock.acquire()

        if initial_condition_fn():
            self.event_distribution_lock.release()
            return

        def handler(args):
            if match_fn(args):
                done.set()

        self.register_event_handler(event, handler)
        self.event_distribution_lock.release()
        done.wait(timeout=timeout)
        self.unregister_event_handler(event, handler)

    def get_lock(self, name):
        self.call_sync('lock.init', name)
        return rpc.ServerLockProxy(self, name)
Exemple #49
0
class Crawler(object):
    """定向爬虫类"""

    http_debuglevel = 0

    #: 预定义网页编码。
    encoding = None

    #: 设置User Agent,有时候模拟Google Bot会有事倍功半的效果。
    user_agent = 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)'

    # 页面语言,有些网站会以这个为标记实现国际化
    accept_language = 'zh_CN'

    # 可接受的数据类型
    accept_mine = 'text/html,application/xhtml+xml,' \
                  'application/xml;q=0.9,*/*;q=0.8'

    #: 最大重定向次数,防止重定向陷阱。
    max_redirects = 20

    #: 每个爬虫的最大并发连接数。
    max_connections = 10

    #: 超时。
    timeout = 360

    #: 最大失败尝试次数。
    max_retries = 1000

    #: 每次尝试后递增休眠间隔。
    #: 例如 ``sleep_seconds = 2`` ,那么第一次连接失败会休眠2秒,第二次会休眠4秒,第三次会休眠6秒。
    sleep_seconds = 1

    #: Bloom容量
    bloom_capacity = 10000000

    #: Bloom预计错误率
    bloom_error_rate = 0.0001

    #: HTTP代理
    proxies = None

    #: 错误日志存放处
    dump_dir = 'dump/'
    is_stop = True
    stopped = False
    name = None

    retry_with_broken_content = False
    retry_with_no_content = False

    #: 如果服务器遇到这些error code,当做正常页面处理
    ignore_server_error_code = ()

    #: 如果服务器遇到这些error code,不进行重试,直接忽略掉
    do_not_retry_with_server_error_code = ()

    lock = None
    logger = logging.getLogger('Crawler')


    def __init__(self):
        httplib.HTTPConnection.debuglevel = self.http_debuglevel
        self.network = NetworkManager(crawler=self)

        self.pool = Pool()
        self.lock = RLock()
        self.bloom_filters = {}
        self.name = self.__class__.__name__
        self._status = {
            'process_count': 0,
            'is_stop': True,
            'run_seconds': 0,
            'crawler_name': self.name,
        }



        # def sync_bloom(self):

        #     """强行同步Bloom到文件"""

    #
    #     while not self.is_stop:
    #         for key in self.bloom_filters.keys():
    #             self.bloom_filters[key].sync()
    #         gevent.sleep(1)



    def work(self):
        """启动爬虫。"""

        if self.lock.acquire(blocking=False):
            self.logger.info('Starting crawler %s' % self.name)
            self.stopped = False
            self._status['is_stop'] = False
            self.pool.spawn(self.run)
            self.pool.join()
            self.network.join()
            self._status['is_stop'] = True
            self.logger.info('Finished crawler %s' % self.name)
            self.lock.release()

    def on_server_error(self, response):
        """服务器错误回调。

        :param response:
        :raise ServerError:
        """
        self.logger.warning('Something wrong with server.')
        raise ServerError('Error Code:%s' % response.status_code)


    def on_proxies_error(self, proxy):
        pass

    def on_parse_error(self, error):
        """页面分析错误回调

        :param error:
        """

    def fetch_proxies(self):
        pass

    def stop(self):
        """停止爬虫。


        """
        self.logger.info('Stopping crawler %s' % self.name)
        self.stopped = True
        while not self.network._request_queue.empty():
            self.network._request_queue.get()


    def status(self):
        """返回爬虫状态。


        :return: :rtype:
        """
        return self._status


    def run(self):
        """这里编写启动爬虫的工作。
        必须重载此函数,推倒第一块多米诺骨牌。

        """
        raise NotImplementedError
Exemple #50
0
class DataMgr(Greenlet):
    pickle_names = ['_msgs', '_users', 'send_queue', 'pending_online_users']
    data_version = 1000

    def __init__(self, logger):
        Greenlet.__init__(self)
        self.logger = logger
        self._users_lock = RLock()
        self._msgs = {}
        self._users = {}
        self.send_queue = Queue()
        self.pending_online_users = Queue()
        self.bootstrap()
        self._dying = False
        self.start()

    def bootstrap(self):
        """Restore data from disk"""
        _ = opath.join(DATA_DIR, DM_PKL_NAME)
        if opath.exists(_):
            _ = pickle.load(file(_, 'rb'))
            if '_version' not in _ or _['_version'] != DataMgr.data_version:
                raise Exception(" pkl file mismatch:program(%d) file(%d)" % (DataMgr.data_version, None if '_version' not in _ else _['_version']))
            self.__dict__.update(_)

    def shutdown(self):
        """Save data to disk"""
        self._dying = True
        self.logger.debug('[DM] saving data to disk...')
        self._save_cache()

    def reset(self):
        """reset in-memory data and disk data"""
        self.send_queue = Queue()
        self.pending_online_users = Queue()
        _ = opath.join(DATA_DIR, DM_PKL_NAME)
        if opath.exists(_):
            os.remove(_)

    def _save_cache(self):
        # fixme: save to external database not implemented
        _ = {'_version':DataMgr.data_version}
        for k in DataMgr.pickle_names:
            if k in self.__dict__:
                _[k] = self.__dict__[k]
        #pickle.dump(_, file(opath.join(DATA_DIR, DM_PKL_NAME), 'wb'), pickle.HIGHEST_PROTOCOL)

    def msg_add(self, msg):
        """add message to msg_queue

            :param msg: msg to add
            :type msg: MessageObj
        """
        if not isinstance(msg, MessageObj):
            raise ValueError(" argument is not a MessageObj")
        self._msgs[msg.msgid] = msg

    def msg_get(self, msgid):
        """get message by msgid

            :param msgid: message id
            :type msgid: int
        """
        if msgid not in self._msgs:
            raise IndexError(" msgid %s not in queue" % idx)
        return self._msgs[msgid]

    def msg_del(self, msgid):
        """del message by msgid

            :param msgid: message id
            :type msgid: int
        """
        del self._msgs[msgid]

    def msg_set(self, msgid, msg):
        self._msgs[msgid] = msg

    @property
    def msg_count(self):
        """get message queue length
        """
        return len(self._msgs)
    
    def set_user_online(self, guid):
        """set user to online

        this will generate a UserObj instance

            :param guid: user guid
            :type guid: int
        """
        #TODO get userid from rid
        uid = "u" + guid
        u = UserObj(uid, guid)
        self.users_add(u)
        self.pending_online_users.put(guid)

    def set_user_offline(self, guid):
        """set user to offline

            :param guid: user guid
        """
        #TODO get userid from rid
        self.users_del(guid)

    def users_add(self, u):
        """add a user instance to user queue

            :param u: user instance
            :type u: UserObj
        """
        if not isinstance(u, UserObj):
            raise ValueError(" argument is not a UserObj")
        self._users_lock.acquire()
        self._users[u.guid] = u
        self._users_lock.release()

    def users_get(self, guid):
        """get user by guid

            :param guid: user guid
        """
        if guid not in self._users:
            raise IndexError(" guid %s not in users list" % guid)
        return self._users[guid]

    def users_del(self, guid):
        """del user by guid

            :param guid: user guid
        """
        if '-' in guid:  # convert to bytes
            guid = binascii.unhexlify(guid)
        if guid not in self._users:
            raise IndexError(" guid %s not in users list" % guid)
        self._users_lock.acquire()
        del self._users[guid]
        self._users_lock.release()

    @property
    def users_count(self):
        """get user queue length
        """
        return len(self._users)

    def make_bundle(self, send_func, user_keys = None):
        """make bundle and call send_func

            :param send_func: the function to call on generated bundles
            :type send_func: lambda, function, instancemethod
            :param user_keys: user guid list to do the match func
            :type send_func: list

        """
        user_keys = user_keys or self._users.keys()
        self.logger.debug('[DM] begin mapping of %du * %dm' % (len(user_keys), self.msg_count))
        cnt = 0
        user_keys = sorted(user_keys, key = lambda x:self._users[x].pr, reverse = True)
        for k in user_keys:
            u = self._users[k]
            for _k, m in self._msgs.iteritems():
                _ = u.gen_bundle(m)
                if _:
                    cnt += 1
                    send_func(_)
        if cnt:
            self.logger.debug('[DM] queued %d new bundles' % cnt)
        return cnt


    def run(self):
        """the background thread that automatically do n*m mapping
        """
        self.mongo_instance = mongo()
        while not self._dying:
            msgids = self.mongo_instance.event_get_id(0)
            for i in msgids:
                # generate new MessageObj instance
                m = MessageObj(
                    payload_callback = lambda d = i:self.mongo_instance.event_get_single_info(d),
                    msgid = i
                )
                self.msg_add(m)
            gevent.sleep(60)
            self._save_cache()
class Balancer(object):
    def __init__(self, dispatcher):
        self.dispatcher = dispatcher
        self.task_list = []
        self.task_queue = Queue()
        self.resource_graph = dispatcher.resource_graph
        self.queues = {}
        self.threads = []
        self.executors = []
        self.logger = logging.getLogger('Balancer')
        self.dispatcher.require_collection('tasks', 'serial', type='log')
        self.create_initial_queues()
        self.start_executors()
        self.distribution_lock = RLock()
        self.debugger = None
        self.debugged_tasks = None
        self.dispatcher.register_event_type('task.changed')

        # Lets try to get `EXECUTING|WAITING|CREATED` state tasks
        # from the previous dispatcher instance and set their
        # states to 'FAILED' since they are no longer running
        # in this instance of the dispatcher
        for stale_task in dispatcher.datastore.query('tasks', ('state', 'in', ['EXECUTING', 'WAITING', 'CREATED'])):
            self.logger.info('Stale Task ID: {0} Name: {1} being set to FAILED'.format(
                stale_task['id'],
                stale_task['name']
            ))

            stale_task.update({
                'state': 'FAILED',
                'error': {
                    'message': 'dispatcher process died',
                    'code': errno.EINTR,
                }
            })

            dispatcher.datastore.update('tasks', stale_task['id'], stale_task)

    def create_initial_queues(self):
        self.resource_graph.add_resource(Resource('system'))

    def start_executors(self):
        for i in range(0, self.dispatcher.configstore.get('middleware.executors_count')):
            self.logger.info('Starting task executor #{0}...'.format(i))
            self.executors.append(TaskExecutor(self, i))

    def start(self):
        self.threads.append(gevent.spawn(self.distribution_thread))
        self.logger.info("Started")

    def schema_to_list(self, schema):
        return {
            'type': 'array',
            'items': schema,
            'minItems': sum([1 for x in schema if 'mandatory' in x and x['mandatory']]),
            'maxItems': len(schema)
        }

    def verify_schema(self, clazz, args):
        if not hasattr(clazz, 'params_schema'):
            return []

        schema = self.schema_to_list(clazz.params_schema)
        val = validator.DefaultDraft4Validator(schema, resolver=self.dispatcher.rpc.get_schema_resolver(schema))
        return list(val.iter_errors(args))

    def submit(self, name, args, sender, env=None):
        if name not in self.dispatcher.tasks:
            self.logger.warning("Cannot submit task: unknown task type %s", name)
            raise RpcException(errno.EINVAL, "Unknown task type {0}".format(name))

        task = Task(self.dispatcher, name)
        task.user = sender.user.name
        task.session_id = sender.session_id
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.args = copy.deepcopy(args)

        if env:
            if not isinstance(env, dict):
                raise ValueError('env must be a dict')

            task.environment = copy.deepcopy(env)

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.set_state(TaskState.CREATED)
        self.task_queue.put(task)
        self.logger.info("Task %d submitted (type: %s, class: %s)", task.id, name, task.clazz)
        return task.id

    def verify_subtask(self, parent, name, args):
        clazz = self.dispatcher.tasks[name]
        instance = clazz(self.dispatcher, self.dispatcher.datastore)
        return instance.verify(*args)

    def run_subtask(self, parent, name, args):
        args = list(args)
        task = Task(self.dispatcher, name)
        task.created_at = datetime.utcnow()
        task.clazz = self.dispatcher.tasks[name]
        task.args = args
        task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
        task.instance.verify(*task.args)
        task.id = self.dispatcher.datastore.insert("tasks", task)
        task.parent = parent

        if self.debugger:
            for m in self.debugged_tasks:
                if fnmatch.fnmatch(name, m):
                    task.debugger = self.debugger

        task.set_state(TaskState.CREATED)
        self.task_list.append(task)
        # If we actually have a non `None` parent task then, add
        # the current subtask to the parent task's subtasks list too
        if parent is not None:
            parent.subtask_ids.append(task.id)
        task.start()
        return task

    def join_subtasks(self, *tasks):
        for i in tasks:
            i.join()

    def abort(self, id):
        task = self.get_task(id)
        if not task:
            self.logger.warning("Cannot abort task: unknown task id %d", id)
            return

        success = False
        if task.started_at is None:
            success = True
        else:
            try:
                task.executor.abort()
                # Also try to abort any subtasks that might have been running
                for st in task.subtask_ids:
                    self.abort(st)
            except:
                pass
        if success:
            task.ended.set()
            task.set_state(TaskState.ABORTED, TaskStatus(0, "Aborted"))
            self.logger.debug("Task ID: %d, Name: %s aborted by user", task.id, task.name)

    def task_exited(self, task):
        self.resource_graph.release(*task.resources)
        self.schedule_tasks()

    def schedule_tasks(self):
        """
        This function is called when:
        1) any new task is submitted to any of the queues
        2) any task exists

        :return:
        """
        for task in [t for t in self.task_list if t.state == TaskState.WAITING]:
            if not self.resource_graph.can_acquire(*task.resources):
                continue

            self.resource_graph.acquire(*task.resources)
            self.threads.append(task.start())

    def distribution_thread(self):
        while True:
            self.task_queue.peek()
            self.distribution_lock.acquire()
            task = self.task_queue.get()

            try:
                self.logger.debug("Picked up task %d: %s with args %s", task.id, task.name, task.args)

                errors = self.verify_schema(self.dispatcher.tasks[task.name], task.args)
                if len(errors) > 0:
                    errors = list(validator.serialize_errors(errors))
                    self.logger.warning("Cannot submit task {0}: schema verification failed with errors {1}".format(
                        task.name,
                        errors
                    ))
                    raise ValidationException(extra=errors)

                task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore)
                task.resources = task.instance.verify(*task.args)

                if type(task.resources) is not list:
                    raise ValueError("verify() returned something else than resource list")

            except Exception as err:
                self.logger.warning("Cannot verify task %d: %s", task.id, err)
                task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(err))
                self.task_list.append(task)
                task.ended.set()
                self.distribution_lock.release()

                if not isinstance(Exception, VerifyException):
                    self.dispatcher.report_error('Task {0} verify() method raised invalid exception', err)

                continue

            task.set_state(TaskState.WAITING)
            self.task_list.append(task)
            self.distribution_lock.release()
            self.schedule_tasks()
            self.logger.debug("Task %d assigned to resources %s", task.id, ','.join(task.resources))

    def assign_executor(self, task):
        for i in self.executors:
            if i.state == WorkerState.IDLE:
                i.checked_in.wait()
                self.logger.info("Task %d assigned to executor #%d", task.id, i.index)
                task.executor = i
                i.state = WorkerState.EXECUTING
                return

        # Out of executors! Need to spawn new one
        executor = TaskExecutor(self, len(self.executors))
        self.executors.append(executor)
        executor.checked_in.wait()
        executor.state = WorkerState.EXECUTING
        task.executor = executor
        self.logger.info("Task %d assigned to executor #%d", task.id, executor.index)

    def dispose_executors(self):
        for i in self.executors:
            i.die()

    def get_active_tasks(self):
        return [x for x in self.task_list if x.state in (
            TaskState.CREATED,
            TaskState.WAITING,
            TaskState.EXECUTING)]

    def get_tasks(self, type=None):
        if type is None:
            return self.task_list

        return [x for x in self.task_list if x.state == type]

    def get_task(self, id):
        self.distribution_lock.acquire()
        t = first_or_default(lambda x: x.id == id, self.task_list)
        if not t:
            t = first_or_default(lambda x: x.id == id, self.task_queue.queue)

        self.distribution_lock.release()
        return t

    def get_executor_by_key(self, key):
        return first_or_default(lambda t: t.key == key, self.executors)

    def get_executor_by_sender(self, sender):
        return first_or_default(lambda t: t.conn == sender, self.executors)
Exemple #52
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger("ResourceGraph")
        self.mutex = RLock()
        self.root = Resource("root")
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None):
        with self.mutex:
            if not resource:
                raise ResourceError("Invalid resource")

            if self.get_resource(resource.name):
                raise ResourceError("Resource {0} already exists".format(resource.name))

            self.resources.add_node(resource)
            if not parents:
                parents = ["root"]

            for p in parents:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError("Invalid parent resource {0}".format(p))

                self.resources.add_edge(node, resource)

    def remove_resource(self, name):
        with self.mutex:
            resource = self.get_resource(name)

            if not resource:
                return

            for i in nx.descendants(self.resources, resource):
                self.resources.remove_node(i)

            self.resources.remove_node(resource)

    def remove_resources(self, names):
        with self.mutex:
            for name in names:
                resource = self.get_resource(name)

                if not resource:
                    return

                for i in nx.descendants(self.resources, resource):
                    self.resources.remove_node(i)

                self.resources.remove_node(resource)

    def update_resource(self, name, new_parents):
        with self.mutex:
            resource = self.get_resource(name)

            if not resource:
                return

            for i in self.resources.predecessors(resource):
                self.resources.remove_edge(i, resource)

            for p in new_parents:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError("Invalid parent resource {0}".format(p))

                self.resources.add_edge(node, resource)

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug("Acquiring following resources: %s", ",".join(names))

            for name in names:
                res = self.get_resource(name)
                if not res:
                    raise ResourceError("Resource {0} not found".format(name))

                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        raise ResourceError("Cannot acquire, some of dependent resources are busy")

                res.busy = True

    def can_acquire(self, *names):
        if not names:
            return True

        with self.mutex:
            self.logger.log(TRACE, "Trying to acquire following resources: %s", ",".join(names))

            for name in names:
                res = self.get_resource(name)
                if not res:
                    return False

                if res.busy:
                    return False

                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        return False

            return True

    def release(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug("Releasing following resources: %s", ",".join(names))

            for name in names:
                res = self.get_resource(name)
                res.busy = False
Exemple #53
0
class HttpScannerOutput(object):

    def __init__(self, args):
        # TODO: make separate queues for fast logging
        self.args = args
        self.lock = RLock()

        # Colorama init
        init()
        # Initialise logging
        self._init_logger()
        # Initialise output
        self._init_output()
        # Stats
        self.urls_scanned = 0

    def _init_output(self):
        # Initialise output
        self._init_requests_output()
        self._init_csv()
        self._init_json()
        self._init_dump()
        self._init_db()

    def _init_logger(self):
        """
        Init logger
        :return: None
        """
        if self.args.log_file is not None:
            self.logger = logging.getLogger('httpscan_logger')
            self.logger.setLevel(logging.DEBUG if self.args.debug else logging.INFO)
            handler = logging.FileHandler(self.args.log_file)
            handler.setFormatter(
                logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%d.%m.%Y %H:%M:%S'))
            self.logger.addHandler(handler)
        else:
            self.logger = None

    def _init_requests_output(self):
        """
        Init requests library output
        :return: None
        """
        if self.args.debug:
            # Enable requests lib debug output
            HTTPConnection.debuglevel = 5
            packages.urllib3.add_stderr_logger()
            logging.basicConfig()
            logging.getLogger().setLevel(logging.DEBUG)
            requests_log = logging.getLogger("requests.packages.urllib3")
            requests_log.setLevel(logging.DEBUG)
            requests_log.propagate = True
        else:
            # Surpress InsecureRequestWarning: Unverified HTTPS request is being made
            packages.urllib3.disable_warnings()

    def _init_csv(self):
        """
        Initialise CSV output
        :return:
        """
        if self.args.output_csv is None:
            self.csv = None
        else:
            # TODO: check if file exists
            self.csv = writer(open(self.args.output_csv, 'wb'), delimiter=';', quoting=QUOTE_ALL)
            self.csv.writerow(['url', 'status', 'length', 'headers'])

    def _init_json(self):
        """
        Initialise JSON output
        :return: None
        """
        self.json = None if self.args.output_json is None else io.open(self.args.output_json, 'w', encoding='utf-8')

    def _init_dump(self):
        """
        Initialise dump folder
        :return: None
        """
        self.dump = path.abspath(self.args.dump) if self.args.dump is not None else None
        if self.dump is not None and not path.exists(self.dump):
            makedirs(self.dump)

    def _init_db(self):
        """
        Initialise database output. Create database and table if missing.
        :return: None
        """
        if self.args.output_database is None:
            self.engine = None
            return

        # Check and create database if needed
        if not database_exists(self.args.output_database):
            create_database(self.args.output_database, encoding='utf8')

        # Create table
        self.engine = create_engine(self.args.output_database)
        self.metadata = MetaData()
        self.scan_table = Table('httpscan', self.metadata,
                                Column('id', Integer, primary_key=True),
                                Column('url', String),
                                Column('status', Integer),
                                Column('length', Integer),
                                Column('headers', String)
                                )
        self.metadata.create_all(self.engine)

    def write(self, **kwargs):
        spawn(self.write_func, **kwargs)

    def write_func(self, **kwargs):
        # Acquire lock
        self.lock.acquire()

        # Output
        self._display_progress(**kwargs)
        self._write_log(**kwargs)

        # Check for exception
        if kwargs['exception'] is None:
            self._filter_and_write(**kwargs)

        # Realse lock
        self.lock.release()

    def _display_progress(self, **kwargs):
        # TODO: add detailed stats
        # Calculate progreess
        percentage = '{percent:.2%}'.format(percent=float(self.urls_scanned) / self.args.urls_count)

        # Generate and print colored output
        out = '[%s] [worker:%02i] [%s]\t%s -> status:%i ' % (
            helper.str_now(), kwargs['worker'], percentage, kwargs['url'], kwargs['status'])
        if kwargs['exception'] is not None:
            out += 'error: (%s)' % str(kwargs['exception'])
        else:
            out += 'length: %s' % naturalsize(int(kwargs['length']))
        if kwargs['status'] == 200:
            print(Fore.GREEN + out + Fore.RESET)
        elif 400 <= kwargs['status'] < 500 or kwargs['status'] == -1:
            print(Fore.RED + out + Fore.RESET)
        else:
            print(Fore.YELLOW + out + Fore.RESET)

    def _filter_and_write(self, **kwargs):
        # Filter responses and save responses that are matching ignore, allow rules
        if (self.args.allow is None and self.args.ignore is None) or \
                (self.args.allow is not None and kwargs['status'] in self.args.allow) or \
                (self.args.ignore is not None and kwargs['status'] not in self.args.ignore):
            self._write_csv(**kwargs)
            self._write_json(**kwargs)
            self._write_dump(**kwargs)
            self._write_db(**kwargs)

    def _kwargs_to_params(self, **kwargs):
        return {'url': kwargs['url'], 'status': kwargs['status'], 'length': kwargs['length'],
                'headers': str(kwargs['response'].headers)}

    def _write_log(self, **kwargs):
        # Write to log file
        if self.logger is None:
            return

        out = '[worker:%02i] %s %s %i' % (kwargs['worker'], kwargs['url'], kwargs['status'], kwargs['length'])
        if kwargs['exception'] is None:
            self.logger.info(out)
        else:
            self.logger.error("%s %s" % (out, str(kwargs['exception'])))

    def _write_csv(self, **kwargs):
        if self.csv is not None:
            self.csv.writerow([kwargs['url'], kwargs['status'], kwargs['length'], str(kwargs['response'].headers)])

    def _write_json(self, **kwargs):
        if self.json is None:
            return

        # TODO: bugfix appending json
        self.json.write(unicode(dumps(self._kwargs_to_params(**kwargs), ensure_ascii=False)))

    def _write_dump(self, **kwargs):
        if kwargs['response'] is None or self.dump is None:
            return

        # Generate folder and file path
        parsed = urlparse(kwargs['url'])
        host_folder = path.join(self.dump, parsed.netloc)
        p, f = path.split(parsed.path)
        folder = path.join(host_folder, p[1:])
        if not path.exists(folder):
            makedirs(folder)
        filename = path.join(folder, f)

        # Get all content
        try:
            content = kwargs['response'].content
        except Exception as exception:
            self.write_log('Failed to get content for %s Exception: %s' % (kwargs['url'], str(exception)))
            return

        # Save contents to file
        with open(filename, 'wb') as f:
            f.write(content)

    def _write_db(self, **kwargs):
        if self.engine is None:
            return

        # TODO: check if url exists in table
        params = self._kwargs_to_params(**kwargs)
        self.engine.execute(self.scan_table.insert().execution_options(autocommit=True), params)

    def write_log(self, msg, loglevel=logging.INFO):
        """
        Write message to log file
        :param msg:
        :param loglevel:
        :return: None
        """
        if self.logger is None:
            return

        self.lock.acquire()
        if loglevel == logging.INFO:
            self.logger.info(msg)
        elif loglevel == logging.DEBUG:
            self.logger.debug(msg)
        elif loglevel == logging.ERROR:
            self.logger.error(msg)
        elif loglevel == logging.WARNING:
            self.logger.warning(msg)

        self.lock.release()

    def print_and_log(self, msg, loglevel=logging.INFO):
        # TODO: make separate logging
        print('[%s] %s' % (helper.str_now(), msg))
        self.write_log(msg, loglevel)
Exemple #54
0
class QueueHandler(object):

    def __init__(self, logger, pending_online_users, make_func, send_func):
        """Initialize Queue Handler

            :param logger: logger object
            :type logger: Logger
            :param pending_online_users: online users queue
            :type pending_online_users: gevent.queue
            :param make_func: the function to make bundle
            :type make_func: lambda,instancemethod,function
            :param send_func: the function to send bundle
            :type send_func: lambda,instancemethod,function
        """
        self.alive = True
        self.last_idx = None
        self.logger = logger
        self.pending_online_users = pending_online_users
        self._pause_lock = RLock()
        self._make_func = make_func
        self._send_func = send_func#self._send_func
        #self.daemon = True
        #self.start()

    def shutdown(self):
        self.alive = False
        #put None to notify running thread
        gevent.killall(self.greenlets)

    def run(self):
        self.greenlets = [
            gevent.spawn(self.main_loop),
            gevent.spawn(self.online_loop)
        ]

    def pause(self):
        self._pause_lock.acquire()

    def resume(self):
        self._pause_lock.release()

    @property
    def qsize(self):
        return self.bundle_queue.qsize()

    def main_loop(self):
        while True:
            self._pause_lock.acquire()
            # call DataMgr.make_bundle to make bundle of full m*n map
            # and pass function _send_func(GatewayMgr.send_push) as argument
            self._make_func(self._send_func)
            self._pause_lock.release()
            #TODO sleep longer
            #gevent.sleep(random.random())
            gevent.sleep(MSG_CHECK_INTERV)

    def online_loop(self):
        while True:
            u = self.pending_online_users.get()
            # call DataMgr.make_bundle to make bundle of full m*1 map for specific user
            self._make_func(self._send_func, user_keys = [u])
            # context switch
            gevent.sleep(0)
Exemple #55
0
class ResourceGraph(object):
    def __init__(self):
        self.logger = logging.getLogger('ResourceGraph')
        self.mutex = RLock()
        self.root = Resource('root')
        self.resources = nx.DiGraph()
        self.resources.add_node(self.root)

    def lock(self):
        self.mutex.acquire()

    def unlock(self):
        self.mutex.release()

    @property
    def nodes(self):
        return self.resources.nodes()

    def add_resource(self, resource, parents=None, children=None):
        with self.mutex:
            if not resource:
                raise ResourceError('Invalid resource')
    
            if self.get_resource(resource.name):
                raise ResourceError('Resource {0} already exists'.format(resource.name))
    
            self.resources.add_node(resource)
            if not parents:
                parents = ['root']
    
            for p in parents:
                node = self.get_resource(p)
                if not node:
                    continue
    
                self.resources.add_edge(node, resource)

            for p in children or []:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError('Invalid child resource {0}'.format(p))

    def remove_resource(self, name):
        with self.mutex:
            resource = self.get_resource(name)
    
            if not resource:
                return
    
            for i in nx.descendants(self.resources, resource):
                self.resources.remove_node(i)
    
            self.resources.remove_node(resource)

    def remove_resources(self, names):
        with self.mutex:
            for name in names:
                resource = self.get_resource(name)
    
                if not resource:
                    return
    
                for i in nx.descendants(self.resources, resource):
                    self.resources.remove_node(i)
    
                self.resources.remove_node(resource)

    def rename_resource(self, oldname, newname):
        with self.mutex:
            resource = self.get_resource(oldname)

            if not resource:
                return

            resource.name = newname

    def update_resource(self, name, new_parents, new_children=None):
        with self.mutex:
            resource = self.get_resource(name)
    
            if not resource:
                return
    
            for i in self.resources.predecessors(resource):
                self.resources.remove_edge(i, resource)
    
            for p in new_parents:
                node = self.get_resource(p)
                if not node:
                    continue
    
                self.resources.add_edge(node, resource)

            for p in new_children or []:
                node = self.get_resource(p)
                if not node:
                    raise ResourceError('Invalid child resource {0}'.format(p))

                self.resources.add_edge(resource, node)

    def get_resource(self, name):
        f = [i for i in self.resources.nodes() if i.name == name]
        return f[0] if len(f) > 0 else None

    def get_resource_dependencies(self, name):
        res = self.get_resource(name)
        for i, _ in self.resources.in_edges([res]):
            yield i.name

    def acquire(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug('Acquiring following resources: %s', ','.join(names))
    
            for name in names:
                res = self.get_resource(name)
                if not res:
                    raise ResourceError('Resource {0} not found'.format(name))
    
                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        raise ResourceError('Cannot acquire, some of dependent resources are busy')
    
                res.busy = True

    def can_acquire(self, *names):
        if not names:
            return True

        with self.mutex:
            self.logger.log(TRACE, 'Trying to acquire following resources: %s', ','.join(names))
    
            for name in names:
                res = self.get_resource(name)
                if not res:
                    return False
    
                if res.busy:
                    return False
    
                for i in nx.descendants(self.resources, res):
                    if i.busy:
                        return False
    
            return True

    def release(self, *names):
        if not names:
            return

        with self.mutex:
            self.logger.debug('Releasing following resources: %s', ','.join(names))
    
            for name in names:
                res = self.get_resource(name)
                res.busy = False

    def draw(self, path):
        return nx.write_dot(nx.relabel_nodes(self.resources, lambda n: f'"{n.name}"'), path)
Exemple #56
0
 def __init__(self):
     self.logger = logging.getLogger('ResourceGraph')
     self.mutex = RLock()
     self.root = Resource('root')
     self.resources = nx.DiGraph()
     self.resources.add_node(self.root)