Beispiel #1
0
class NotifyingQueue(Event):
    """ A queue that follows the wait protocol. """

    def __init__(self):
        super(NotifyingQueue, self).__init__()
        self._queue = Queue()

    def put(self, item):
        """ Add new item to the queue. """
        self._queue.put(item)
        self.set()

    def empty(self):
        return self._queue.empty()

    def get(self, block=True, timeout=None):
        """ Removes and returns an item from the queue. """
        value = self._queue.get(block, timeout)
        if self._queue.empty():
            self.clear()
        return value

    def stop(self):
        """ Request a stop event. """
        self.set()
Beispiel #2
0
class MemorySession(Session):
    """
    In memory session with a outgoing gevent Queue as the message
    store.
    """

    def __init__(self, server, session_id=None):
        super(MemorySession, self).__init__(server, session_id=session_id)
        self.session_id = session_id or str(uuid.uuid4())[:8]
        self.server = server

        self.queue = Queue()

        self.hits = 0
        self.heartbeats = 0
        self.connected = False

    def add_message(self, msg):
        self.queue.put_nowait(msg)

    def get_messages(self, **kwargs):
        timeout = kwargs.get('timeout', None)

        self.incr_hits()

        if self.queue.empty():
            try:
                return self.queue.get(**kwargs)
            except Empty:
                return []
        else:
            accum = []
            try:
                while not self.queue.empty():
                    if timeout:
                        accum.append(self.queue.get(timeout=timeout))
                    else:
                        accum.append(self.queue.get_nowait())
            finally:
                return accum

    def interrupt(self):
        """
        A kill event trigged through a client accessible endpoint

        Internal expires will not have is_interupted() == True
        """
        self.interrupted = True
        self.kill()

    def kill(self):
        self.connected = False

        # Expire only once
        if not self.expired:
            self.expired = True
            self.timeout.set()
class WorkQueue(object):
    def __init__(self, worker, start_runner=None, max_work_load=16):
        if not worker:
            # TODO raise exception
            pass
        self.worker = worker
        self._start_runner = start_runner
        self._max_work_load = max_work_load
        # do we want to limit the size of the queue?
        self._queue = Queue()
        self._num_enqueues = 0
        self._num_dequeues = 0
        self._runner = Runner(self, self._max_work_load)
    #end __init__

    def enqueue(self, work_item):
        self._queue.put(work_item)
        self._num_enqueues = self._num_enqueues + 1
        self.may_be_start_runner()
    #end enqueue

    def dequeue(self):
        try:
            work_item = self._queue.get_nowait()
        except Empty:
            work_item = None
        else:
            self._num_dequeues = self._num_dequeues + 1
        return work_item
    #end dequeue

    def may_be_start_runner(self):
        if self._queue.empty() or \
           (self._start_runner and not self._start_runner()):
            return
        self._runner.start()
    #end may_be_start_runner

    def runner_done(self):
        if self._queue.empty() or \
           (self._start_runner and not self._start_runner()):
            return True
        return False
    #end runner_done

    def is_queue_empty(self):
        if self._queue.empty():
            return True
        return False

    def num_enqueues(self):
        return self._num_enqueues
    #end num_enqueues

    def num_dequeues(self):
        return self._num_dequeues
Beispiel #4
0
class MatlabConnect(object):
    def __init__(self, headset, path = ''):
        self.headset = headset
        self.dataQueue = Queue()
        self.isRunning = False
        self.sensors = ['F3','FC5', 'AF3', 'F7', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'F8', 'AF4', 'FC6', 'F4'] 
        
        command = os.path.join(path, 'matlab')
        os.system('%s -automation -desktop' % command)

    def get_sensors_info(self):
        """
            Greenlet to get a packet from Emotiv headset.
            Append new data to queues where consumers will read from
        """
        try:
            while self.isRunning:
                packet = self.headset.dequeue()
                values = [packet.sensors[name]['value'] for name in self.sensors]
                if self.dataQueue is not None:
                    self.dataQueue.put_nowait(values)
                
                gevent.sleep(0)
        except KeyboardInterrupt:
            print ('Read stopped')
            self.isRunning = False
        except Exception as e:
            print ('Read Error: %s' % e)
            self.isRunning = False
        finally:
            print ('Read over')
            self.isRunning = False
            self.headset.close()

    def matlabBridge(self, varName):
        data = None
        while self.isRunning or not self.dataQueue.empty():
            while not self.dataQueue.empty():
                buf = self.dataQueue.get()

                if data is None:
                    data = np.array(buf, dtype=int)
                else:
                    data = np.vstack((data, buf))

            self.session.putvalue(varName, data)
            gevent.sleep(1)
        print 'Matlab over'
Beispiel #5
0
class Spider:
    def __init__(self, url='', depth=1, threads=4):
	self.url = url
	self.depth = depth
	self.threads = threads
	self.tasks = Queue()
	self.bucket = []
	
    def run(self):
	self.tasks.put(Task(self.url, self.depth))
	threds = [ 
		gevent.spawn(self.worker)
		for i in range(self.threads)
		]
	gevent.joinall(threds)

    def worker(self, worker_id=''):
	while not self.tasks.empty():
	    task = self.tasks.get()
	    if task.url in self.bucket:
		# here have a bug
		continue
	    self.bucket.append(task.url)
	    task.run()
	    for t in task.subtasks:
		self.tasks.put_nowait(t)
 def _run(self):
     utils.log("[%s] parsing site %s" % (self, self.base))
     
     queue = Queue()
     pool  = Pool(32)
     seed  = 'http://www.amazon.com/best-sellers-books-Amazon/zgbs/books/'
     parsed = set()
     
     queue.put_nowait((seed, 'seed', 0))
     
     while True:
         items = []
         
         while not queue.empty():
             item = queue.get_nowait()
             if item[0] not in parsed:
                 items.append(item)
                 parsed.add(item[0])
         
         if 0 == len(items) and 0 == len(pool):
             break
         
         for item in items:
             pool.spawn(self._parseResultsPage, queue, item[0], item[1], item[2])
         
         time.sleep(0.01)
     
     pool.join()
     self._output.put(StopIteration)
Beispiel #7
0
class Crawler(object):
	def __init__(self, processor):
		self.processor = processor
		self.pool = Pool(self.processor.concurrency)
		self.base_host = urlsplit(self.processor.start_url).hostname
		self.urls = Queue()
		self.urls.put(self.processor.start_url)
		self.visited_urls = set()
		self.visited_urls_lock = RLock()
		self.pages_count = 0
		
	def start(self):
		while True:
			if self.pages_count >= self.processor.max_pages:
				self.urls = Queue()
				break
			try:
				url = self.urls.get_nowait()
				self.pool.wait_available()
				spider = Spider(self, self.processor, url)
				self.pool.start(spider)
				self.pages_count += 1
			except Empty:
				break
		self.pool.join()
		if not self.urls.empty():
			self.start()
Beispiel #8
0
class Scheduler(object):

    """ Scheduler """

    def __init__(self):
        self.request_filter = RequestFilter()
        self.queue = Queue()

    def enqueue_request(self, request):
        """put request
        """
        if not request.dont_filter \
                and self.request_filter.request_seen(request):
            logger.warn("ignore %s", request.url)
            return
        self.queue.put(request)

    def next_request(self):
        """next request
        """
        if self.queue.empty():
            return None
        return self.queue.get()

    def __len__(self):
        return self.queue.qsize()
class Worker():
    def __init__(self):
        self.threads = []
        self.queue = Queue()

    def long_func(self, th, seed):
        k = 0
        while k < 10000:
            print "LOG: Inside the long function Thread: ", th, " Seed: ", seed
            time.sleep(.1)
        print "LOG: Long function is out of the loop", seed
        self.queue.put_nowait(seed)

    def short_func(self, th, seed):
        print "LOG: Inside the short function Thread:", th, " Seed: ", seed
        self.queue.put_nowait(seed)

    def start(self, seed):
        print "INFO: Initializing the threads..."
        self.threads.append(gevent.spawn(self.long_func, 1, seed))
        gevent.sleep(1)
        self.threads.append(gevent.spawn(self.short_func, 2, seed))
        while self.queue.empty():
            print "INFO: Queue is empty %s" % seed
            gevent.sleep(0)
        raise TaskComplete

    def stop(self):
        gevent.killall(self.threads)
Beispiel #10
0
class NotifyingQueue(Event):
    def __init__(self, maxsize=None, items=()):
        super().__init__()
        self._queue = Queue(maxsize, items)

    def put(self, item):
        """ Add new item to the queue. """
        self._queue.put(item)
        self.set()

    def get(self, block=True, timeout=None):
        """ Removes and returns an item from the queue. """
        value = self._queue.get(block, timeout)
        if self._queue.empty():
            self.clear()
        return value

    def peek(self, block=True, timeout=None):
        return self._queue.peek(block, timeout)

    def __len__(self):
        return len(self._queue)

    def copy(self):
        """ Copies the current queue items. """
        copy = self._queue.copy()

        result = list()
        while not copy.empty():
            result.append(copy.get_nowait())
        return result
Beispiel #11
0
def test_async_multi_publish_consume():
    with conn.channel() as channel:
        # first message
        message_body = 'test_async_multi_publish_consume message 1'
        channel.basic_publish(
            exchange='unit_test_room',
            routing_key='user1',
            body=message_body
        )

    recv_queue = Queue()
    rchannel = conn.allocate_channel()
    rchannel.basic_consume(queue='listener1', callback=recv_queue.put)

    resp = recv_queue.get()
    eq_(resp.body, message_body)
    resp.ack()

    assert recv_queue.empty()

    with conn.channel() as channel:
        # second message
        message_body = 'test_async_multi_publish_consume message 2'
        channel.basic_publish(
            exchange='unit_test_room',
            routing_key='user1',
            body=message_body
        )

    resp = recv_queue.get()
    eq_(resp.body, message_body)
    resp.ack()
Beispiel #12
0
class Worker():
    def __init__(self,inputdict, timeout, outputmode, validation_func):
        self.threads = []
        self.queue = Queue()
        self.inputdict = inputdict
        self.timeout = timeout
        self.outputmode = outputmode
        self.validation_func = validation_func
 
    
    def infi(self, th, thm):
        k = 0
        while k<10000:
	    print 'I am in INFI ', th, thm
            time.sleep(.1)
        print "out while infi", thm
        self.queue.put_nowait(thm)
        
    
    def test(self, th, thm):
        print "inside test", thm
        self.queue.put_nowait(thm)
    
    def start(self, thm):
        print "Hii"
        self.threads.append(gevent.spawn(self.infi, 1, thm))
	self.threads.append(gevent.spawn(self.test, 2, thm))
	while self.queue.empty():
	    print "queue is empty %s" % thm
	    gevent.sleep(0)
        raise TaskComplete
        
    def stop(self):
	gevent.killall(self.threads)
Beispiel #13
0
class WebSocketClient(WebSocketBaseClient):
    def __init__(self, url, protocols=None, extensions=None):
        WebSocketBaseClient.__init__(self, url, protocols, extensions)
        self._th = Greenlet(self.run)

        self.messages = Queue()

    def handshake_ok(self):
        self._th.start()

    def received_message(self, message):
        self.messages.put(copy.deepcopy(message))

    def closed(self, code, reason=None):
        # When the connection is closed, put a StopIteration
        # on the message queue to signal there's nothing left
        # to wait for
        self.messages.put(StopIteration)

    def receive(self):
        # If the websocket was terminated and there are no messages
        # left in the queue, return None immediately otherwise the client
        # will block forever
        if self.terminated and self.messages.empty():
            return None
        message = self.messages.get()
        if message is StopIteration:
            return None
        return message
 def _run(self):
     utils.log("[%s] parsing site %s" % (self, self.base))
     
     queue = Queue()
     pool  = Pool(64)
     seed  = 'http://www.nytimes.com/best-sellers-books/'
     
     pool.spawn(self._parseResultsPage, pool, queue, seed, 'current', True)
     
     while True:
         items = []
         
         while not queue.empty():
             item = queue.get_nowait()
             items.append(item)
         
         if 0 == len(items) and 0 == len(pool):
             break
         
         for item in items:
             pool.spawn(item[0], pool, queue, item[1], item[2], item[3])
         
         time.sleep(0.01)
     
     pool.join()
     self._output.put(StopIteration)
Beispiel #15
0
class Spider:
    def __init__(self, url='', depth=1):
	self.tasks = Queue()
	self.tasks.put(url)
	self.init_url = url or ''
	self.depth = depth or ''
	
    def run(self):
	threds = [
		gevent.spawn(self.work),
		gevent.spawn(self.work),
		gevent.spawn(self.work),
		gevent.spawn(self.work)
		]
	gevent.joinall(threds)

    def work(self):
	while not self.tasks.empty():
	    page = self.tasks.get()
	    p = Page(page, '')
	    p.do_request()
	    p.parse_content()
	    hrefs = p.hrefs

	    for href in hrefs:
		self.tasks.put_nowait(href)
Beispiel #16
0
 def _run(self):
     utils.log("[%s] parsing site %s" % (self, self.base))
     
     queue = Queue()
     pool  = Pool(64)
     seed  = 'http://community.seattletimes.nwsource.com/entertainment/i_results.php?search=venue&type=Restaurant&page=1'
     
     pool.spawn(self._parseResultsPage, pool, queue, seed, '1', True)
     
     while True:
         items = []
         
         while not queue.empty():
             item = queue.get_nowait()
             items.append(item)
         
         if 0 == len(items) and 0 == len(pool):
             break
         
         for item in items:
             pool.spawn(item[0], pool, queue, item[1], item[2], item[3])
         
         time.sleep(0.01)
     
     pool.join()
     self._output.put(StopIteration)
Beispiel #17
0
class Actor(Greenlet):
	__metaclass__ = MetaActor
	
	def __init__(self):
		Greenlet.__init__(self)
		self.inbox = Queue()
		Actor.actors.append(self)
		
	def send(self, actor, message):
		actor.inbox.put(message)
		
	def receive(self, message):
		raise NotImplemented()
		
	@staticmethod
	def wait_actors():
		gevent.joinall(Actor.actors)
		
	def loop(self):
		if not self.inbox.empty():
			self.receive(self.inbox.get())
		gevent.sleep()
		
	def _run(self):
		while self.started:
			self.loop()
Beispiel #18
0
 def _run(self):
     utils.log("[%s] parsing site %s" % (self, self.base))
     
     queue = Queue()
     pool  = Pool(16)
     seed  = 'http://www.awardannals.com/skin/menubar81.html'
     
     pool.spawn(self._parseIndexPage, pool, queue, seed, 'index')
     
     while True:
         items = []
         
         while not queue.empty():
             item = queue.get_nowait()
             items.append(item)
         
         if 0 == len(items) and 0 == len(pool):
             break
         
         for item in items:
             pool.spawn(self._parseResultsPage, pool, queue, item[0], item[1], False)
         
         time.sleep(0.01)
     
     pool.join()
     self._output.put(StopIteration)
Beispiel #19
0
def recursive_crawl(url):
    all_urls = set()
    processing_urls = set()
    processed_urls = set()
    task_queue = Queue()
    data_queue = Queue()

    def is_processed(url):
        return url in processed_urls

    def is_processing(url):
        return url in processing_urls

    def mark_processed(url):
        if is_processing(url):
            processing_urls.remove(url)
        if is_processed(url):
            print('Duplicate processed url {}'.format(url))
        else:
            processed_urls.add(url)

    def mark_processing(url):
        processing_urls.add(url)

    def add_to_all(url):
        if url not in all_urls:
            print('Record url {}'.format(url))
            all_urls.add(url)

    mark_processing(url)
    task_queue.put_nowait(url)

    # Start workers
    workers = []
    for i in xrange(10):
        workers.append(
            gevent.spawn(url_worker,
                         i, task_queue, data_queue)
        )
    print('workers', len(workers))

    while processing_urls:
        if data_queue.empty():
            gevent.sleep(0)
            continue

        done_url, hrefs = data_queue.get()

        mark_processed(done_url)

        for sub_url in hrefs:
            add_to_all(sub_url)

            if not is_processed(sub_url) and not is_processing(sub_url):
                mark_processing(sub_url)
                task_queue.put_nowait(sub_url)

    print('Processed', len(processed_urls), 'All', len(all_urls))
    print('Total latency', demo_helpers.TOTAL_LATENCY)
Beispiel #20
0
class ZMQSummarizedTestResult(ZMQTestResult):
    def __init__(self, args):
        super(ZMQSummarizedTestResult, self).__init__(args)
        self.interval = 1.
        self._data = Queue()
        gevent.spawn_later(self.interval, self._dump_data)

    def push(self, data_type, **data):
        self._data.put_nowait((data_type, data))

    def close(self):
        while not self._data.empty():
            self._dump_data(loop=False)
        self.context.destroy()

    def _dump_data(self, loop=True):
        if self._data.empty() and loop:
            gevent.spawn_later(self.interval, self._dump_data)
            return

        data = {'data_type': 'batch',
                'agent_id': self.agent_id,
                'hostname': get_hostname(),
                'run_id': self.run_id,
                'counts': defaultdict(list)}

        # grabbing what we have
        for _ in range(self._data.qsize()):
            data_type, message = self._data.get()
            data['counts'][data_type].append(message)

        while True:
            try:
                self._push.send(self.encoder.encode(data), zmq.NOBLOCK)
                break
            except zmq.ZMQError as e:
                if e.errno in (errno.EAGAIN, errno.EWOULDBLOCK):
                    continue
                else:
                    raise
        if loop:
            gevent.spawn_later(self.interval, self._dump_data)
Beispiel #21
0
def recursive_crawl(url):
    all_urls = set()
    processing_urls = set()
    processed_urls = set()
    data_queue = Queue()
    gpool = Pool(10)

    def is_processed(url):
        return url in processed_urls

    def is_processing(url):
        return url in processing_urls

    def mark_processed(url):
        if is_processing(url):
            processing_urls.remove(url)
        if is_processed(url):
            print('Duplicate processed url {}'.format(url))
        else:
            processed_urls.add(url)

    def mark_processing(url):
        processing_urls.add(url)

    def add_to_all(url):
        if url not in all_urls:
            print('Record url {}'.format(url))
            all_urls.add(url)

    mark_processing(url)
    fetch_and_extract(url, data_queue)

    while processing_urls:
        if data_queue.empty():
            gevent.sleep(0)
            continue

        done_url, hrefs = data_queue.get()

        mark_processed(done_url)

        for sub_url in hrefs:
            add_to_all(sub_url)

            if not is_processed(sub_url) and not is_processing(sub_url):
                mark_processing(sub_url)
                gpool.spawn(fetch_and_extract, sub_url, data_queue)

    print('Processed', len(processed_urls), 'All', len(all_urls))
    print('Total latency', demo_helpers.TOTAL_LATENCY)
Beispiel #22
0
class PSPool(object):
    LIFE_TIMES = 60 * 1 #sock生命周期
    def __init__(self, host, port, max_sock):
        self.host = host
        self.port = port
        self.max_sock = max_sock
        self.socks = Queue(maxsize=max_sock)
        self.threads = {}
        self.sock_times = {}

    def init_sock(self):
        """ 初始化和pp服务器的socket """
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((self.host, self.port))
        return sock

    def get(self):
        if self.socks.empty():
            sock = self.init_sock()
            self.sock_times[sock] = time.time()
            return  sock
        return self.socks.get()

    def put(self, sock):
        times = time.time() - self.sock_times[sock]
        if times >= self.LIFE_TIMES or self.socks.full():
            self.free(sock)
            return
        self.socks.put(sock)

    def free(self, sock):
        self.sock_times.pop(sock, None)
        try:
            sock.close()
        except:
            pass

    def __enter__(self):
        cur_thread = getcurrent()
        if cur_thread in self.threads:
            raise ValueError('not support reenter')
        self.threads[cur_thread] = self.get()
        return self.threads[cur_thread]

    def __exit__(self, exc_type, exc_val, exc_tb):
        sock = self.threads.pop(getcurrent())
        if exc_type is None:
            self.put(sock)
        else:
            self.free(sock)
Beispiel #23
0
 def poll(self):
     print ('@poll(%d):started...' % self.instance)
     print ('@poll(%d):creating GQueue.Queue...' % self.instance)
     sessions = GQueue()
     print ('@poll(%d):creating Thread...' % self.instance)
     threads = [gevent.spawn(self.task, netdev,
             sessions) for netdev in self.netdevices]
     gevent.sleep(0)
     gevent.joinall(threads)
     data = {}
     while not sessions.empty():
         data.update(sessions.get())
     self.write(data)
     print ('@poll(%d):Done!' % self.instance)
Beispiel #24
0
class ResPool():
    def __init__(self):
        self.que=Queue() #资源池 数据库连接池可参考这样使用
        for i in range(0,2):
            self.que.put(i)

    def empty(self):
        return self.que.empty()

    def get(self):
        return self.que.get()

    def put(self, obj):
        self.que.put(obj)
    def _StateIteratorForIdTuples(self, slice_gen):
        """Generator which iterates over State buffers for a given list of ID
    tuples. Separated the list of tuples into batches of a maximum size.

    Args:
      slice_gen - Generator which returns the next list of ID tuples of the
          form (Client ID, Taba Name) to lookup.
    """
        # Split the lookups into batches, and start a background greenlet to
        # retrieve the batches. Use a queue to retrieve results so that they can be
        # processes as soon as they are available, and limit the size of the queue
        # to control memory usage.
        result_queue = Queue(8)

        def _GetBatchWorker():
            while True:

                try:
                    id_slice = slice_gen.next()

                except StopIteration:
                    result_queue.put(None)
                    return

                state_op = self.StateGetBatchGenerator(id_slice)
                if not state_op.success:
                    LOG.error("Error retrieving State batch\n%s" % state_op)
                    result_queue.put(Exception)
                    return

                else:
                    result_queue.put(state_op)

        workers = [gevent.spawn(_GetBatchWorker) for _ in xrange(8)]

        # Extract the results as long as there are unprocessed slices or there are
        # results available.
        while not all([w.ready() for w in workers]) or not result_queue.empty():
            state_op = result_queue.get()

            # Yield the results from this batch.
            if state_op:
                for i, ((client_id, name), state) in enumerate(state_op.response_value):
                    if state is not None:
                        yield ((client_id, name), state)

                    # Yield to other greenlets periodically.
                    if i % 5000 == 0:
                        gevent.sleep(0)
Beispiel #26
0
class ConnectionPool(object):

    def __init__(self, host, port, maxsize=10, connect_timeout=None,
                 read_timeout=None, factory=lambda x: x):
        if not isinstance(maxsize, (int, long)):
            raise TypeError('Expected integer, got %r' % (maxsize, ))
        self.maxsize = maxsize
        self.pool = Queue()
        self.size = 0
        self.host = host
        self.port = port
        self.factory = factory
        self.connect_timeout = connect_timeout
        self.read_timeout = read_timeout

    def get(self):
        pool = self.pool
        if self.size >= self.maxsize or pool.qsize():
            return pool.get()
        else:
            self.size += 1
            try:
                new_item = self.create_connection()
            except Exception:
                self.size -= 1
                raise
            return new_item

    def put(self, item):
        self.pool.put(item)

    def lose(self, item):
        self.size -= 1
        item.close()

    def closeall(self):
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass

    def create_connection(self):
        """Create connection to remote host."""
        sock = socket.create_connection((self.host, self.port),
                                        timeout=self.connect_timeout)
        sock.settimeout(self.read_timeout)
        return self.factory(sock)
Beispiel #27
0
class ConnectionPool(object):

    def __init__(self, connection_cls, maxsize=100, **kwargs):
        if not isinstance(maxsize, integer_types):
            raise TypeError('Expected integer, got %r' % (maxsize, ))
        self._connection_cls = connection_cls
        self._maxsize = maxsize
        self._pool = Queue()
        self._size = 0
        self._conn_params = kwargs

    def get(self):
        if self._size >= self._maxsize or self._pool.qsize():
            return self._pool.get()
        else:
            self._size += 1
            try:
                return self._connection_cls(**self._conn_params)
            except:
                self._size -= 1
                raise

    def put(self, item):
        self._pool.put(item)

    def closeall(self):
        while not self._pool.empty():
            conn = self._pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass

    @contextlib.contextmanager
    def connection(self):
        conn = self.get()
        try:
            yield conn
        except:
            if conn.closed:
                conn = None
                self.closeall()
            raise
        finally:
            if conn is not None and not conn.closed:
                self.put(conn)
Beispiel #28
0
class Crawler:
    """Crawler class"""
    def __init__(self, url, limit):
        """
        Initializes Crawler class
        @param url: Input seed page
        type limit: number
        @param limit: total no. of urls to fetch
        """
        self.url = url
        self.url_count_limit = limit
        self.tasks = Queue()
        self.counter = 0

    def crawl(self, url):
        """
        Crawler function
        Takes input seed pages, and uses BeautifulSoup module to fetch links inside it
        """
        try:
            data = urllib2.urlopen(url)
            bs = BeautifulSoup(data.read())
            links=bs('a')
    
            for link in links:
                if ('href' in dict(link.attrs)):
                    url=urljoin(url,link['href'])
                if url.find("'") != -1: 
                    continue
                url=url.split('#')[0]
    
                if url[0:4] == 'http':
                    if self.counter < self.url_count_limit:
                        self.tasks.put(url)
                        self.counter += 1
                        logger.info('ADDED: %s' % url)
        except:
            logger.info('ERROR: %s' % url)

    def run(self):
        self.crawl(self.url)
        """For Async handling of each url"""
        while not self.tasks.empty():
            url = self.tasks.get()
            gevent.spawn(self.crawl, url).join()
            logger.info('FETCH: %s' % url)
Beispiel #29
0
class SimpleDBPool():
    DBPOOL_SIZE=0
    HOST=''
    PORT=0
    USERNAME=''
    PASSWD=''
    DATABASE=''
    def __init__(self):
        self._dbFree =Queue()
        self._inited =False
        
    def Init(self,dbpool_size,host,port,username,passwd,database):
        self.DBPOOL_SIZE=dbpool_size
        self.HOST=host
        self.PORT=port
        self.USERNAME=username
        self.PASSWD=passwd
        self.DATABASE=database
        
        for conn in [pymysql.connect(host=self.HOST,port=self.PORT,user=self.USERNAME,passwd=self.PASSWD,db=self.DATABASE) for i in xrange(0,self.DBPOOL_SIZE)] :
            self._dbFree.put(conn)
        self._inited =True
    def Final(self):
        for conn in self._dbFree :
            conn.close()
        self._inited =False
            
    def Get(self):
        if not self._inited:
            return
        while self._dbFree.empty():
            print 'empty'            
            gevent.sleep(0.1)
        conn =self._dbFree.get()
        return conn
    def Release(self,conn,commit):
        if not self._inited:
            return
        if commit:
            conn.commit()
        else:
            conn.rollback()
        print 'put'
        self._dbFree.put(conn)
        gevent.sleep(0)        
Beispiel #30
0
class ServerTransport(gevent.Greenlet):

    def __init__(self, client_id, conn, delta=0):
        gevent.Greenlet.__init__(self)
        self.client_id = client_id
        self.conn = conn
        self.delta = delta
        self.queue = Queue()
        self.logger = logging.getLogger('server')
        self.t_0 = 0

    def _run(self):
        #self.send_synchronize()
        while True:
            if not self.queue.empty():
                evnt = self.queue.get()
                if evnt["event_type"] == 1:
                    evnt["sent_at"] = pygame.time.get_ticks()
                    self.t_0 = evnt["sent_at"]
                #self.logger.info("Server before send")
                gevent.sleep(0)
                self.conn.sendall(json.dumps(evnt)+"\n")            # write event to connection
                #self.logger.info("Server after send")
                if evnt["event_type"] == 1:
                    #self.logger.info("Server before recv")
                    gevent.sleep()
                    data = self.conn.recv(1024)                     # wait read
                    #self.logger.info("Server after recv, data: %s", data)
                    self.handle_response(data)                      # handle data
            gevent.sleep(0)
        self.conn.close()
        self.s.close() #todo remove

    def add_event(self, data):
        self.queue.put(data)

    def send_synchronize(self):
        raise "not implemented"

    def handle_response(self):
        raise "not implemented"
Beispiel #31
0
class SiteInfo:
    def __init__(self, targets):
        self.conn = sqlite3.connect(user_path + '/db/Rules.db')
        self.cursor = self.conn.cursor()
        self.targets: List = targets
        self.queue = Queue()
        self.protocol: str = 'http://'
        self.headers: Dict = {'User-Agent': user_agent}
        self.results: List = []
        sql = 'select * from fingerprint'
        self.cursor.execute(sql)
        self.rules: Dict = {}
        for item in self.cursor.fetchall():
            self.rules[item[1]] = item[2].split(', ')

        self.title_pattern = "<title.*?>(.*?)</title.*?>"
        self.encoding_pattern = "encoding=[\'|\"]?(.*?)[\'|\"]"

    def enqueue_domain(self):
        for domain in self.targets:
            self.queue.put_nowait(domain)

    def get_title(self, r):
        try:
            encoding = r.encoding
            if not r.text:
                return ''
            if not encoding:
                if re.findall('charset=[\'|\"]?(.*?)[\'|\"]', r.text,
                              re.I | re.S):
                    encoding = re.findall('charset=[\'|\"]?(.*?)[\'|\"]',
                                          r.text, re.I | re.S)[0]
                elif re.findall(self.encoding_pattern + '?', r.text,
                                re.I | re.S):
                    encoding = re.findall(self.encoding_pattern, r.text,
                                          re.I | re.S)[0]
                else:
                    return ''
            if encoding == 'ISO-8859-1' and re.findall(self.title_pattern,
                                                       r.text, re.I | re.S):
                if re.findall('charset=[\'|\"]?(.*?)[\'|\"]', r.text,
                              re.I | re.S):
                    encoding = re.findall('charset=[\'|\"]?(.*?)[\'|\"]',
                                          r.text, re.I | re.S)[0]
                elif re.findall(self.encoding_pattern + '?', r.text,
                                re.I | re.S):
                    encoding = re.findall(self.encoding_pattern, r.text,
                                          re.I | re.S)[0]
                else:
                    encoding = 'utf-8'
                return re.findall(self.title_pattern, r.text,
                                  re.I | re.S)[0].encode("iso-8859-1").decode(
                                      encoding).encode('utf-8').decode(
                                          'utf-8', errors='ignore')
            elif re.findall(self.title_pattern, r.text,
                            re.I | re.S) and encoding.lower() in [
                                'utf-8', 'gb2312', 'gbk2312', 'gbk'
                            ]:
                return re.findall(self.title_pattern, r.text,
                                  re.I | re.S)[0].strip()
            elif re.findall(self.title_pattern, r.text, re.I | re.S):
                return re.findall(self.title_pattern, r.text,
                                  re.I | re.S)[0].encode(encoding).decode(
                                      'utf-8', errors='ignore').strip()
            else:
                return ''
        except AttributeError:
            return ''
        except LookupError:
            return ''

    def get_info(self):
        while not self.queue.empty():
            domain = self.queue.get()
            result = {
                'domain': domain,
                'title': '',
                'text': '',
                'headers': [],
                'app': []
            }
            try:
                r = requests.get(self.protocol + domain,
                                 timeout=3,
                                 headers=self.headers)
            except requests.exceptions.ConnectTimeout:
                return result
            except requests.exceptions.ReadTimeout:
                continue
            except requests.exceptions.ConnectionError:
                continue

            result['title'] = self.get_title(r)
            result['headers'] = [{
                "key": k,
                "value": v
            } for k, v in r.headers.items()]
            result['text'] = r.text

            for appname, rules in self.rules.items():
                for rule in rules:
                    place, rule = rule.split(':', 1)
                    if place in ['body']:
                        if r.text.find(rule) != -1:
                            result['app'].append(appname)
                            break
                    elif place in ['title']:
                        if re.search(
                                '<title>.*?' + re.escape(rule) + '.*?</title>',
                                r.text):
                            result['app'].append(appname)
                            break
                    elif place in ['header', 'server']:
                        header = ''
                        for key, value in r.headers.items():
                            header += key + ': ' + value + ' '
                        if re.search(re.escape(rule), header, re.I):
                            result['app'].append(appname)
                            break
            self.results.append(result)

    def run(self):
        print("获取页面信息")
        self.enqueue_domain()
        threads = [
            gevent.spawn(self.get_info) for _ in range(siteinfo_thread_num)
        ]
        gevent.joinall(threads)
        return self.results
Beispiel #32
0
class Recorder(object):
    """
    Recorder class. Producer, consumers and controller greenlets are methods of this class.
    Although not implemented, Recorder should be a Singleton.
    """
    def __init__(self, emotiv, filename):
        self.isRunning = False
        self.isRecording = False

        self.sensors = np.array(['F3','P7','O1','O2','P8','F4'])
        self.PLOT_MIN_Y = 0
        self.PLOT_MAX_Y = 1000

        #### PROTOCOL DEFINITION ####
        self.ITERATIONS = config.RECORDING_ITERATIONS
        self.PERIOD = config.RECORDING_PERIOD # Recording stimulated SSVEP
        self.PAUSE_INTER_RECORDING = 2
        self.STIMULI_PATH = config.STIMULI_PATH
        self.DATA_PATH = config.DATA_PATH
        self.FILENAME = filename
        self.LOW_FREQ = 1
        self.NUM_STIMULI = 3

        self.headset = emotiv
        self.plotQueue = Queue()
        self.recorderQueue = Queue()

    def get_sensors_info(self):
        """
            Greenlet to get a packet from Emotiv headset.
            Append new data to queues where consumers will read from
        """
        try:
            while self.isRunning:
                buf = np.zeros((config.FS, len(self.sensors)))
                for i in range(len(buf)):
                    packet = self.headset.dequeue()
                    values = [packet.sensors[name]['value'] for name in self.sensors]
                    buf[i] = np.array(values)

                gevent.sleep(0) # need cause recording could be over
                if self.plotQueue is not None:
                    self.plotQueue.put_nowait(buf)
                if self.recorderQueue is not None and self.isRecording:
                    self.recorderQueue.put_nowait(buf)
                
                gevent.sleep(0)
        except KeyboardInterrupt:
            print ('Read stopped')
            self.isRunning = False
        except Exception as e:
            print ('Read Error: %s' % e)
            self.isRunning = False
        finally:
            print ('Read over')
            self.isRunning = False
            self.headset.close()

    def controller(self):
        """
        Greenlet that controls recording process.
        Performs many iterations of recording response to the stimuli, first left and then right.
        """
        # if self.LOW_FREQ:
        #     TOP = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("64.exe")]
        #     SX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("69.exe")]
        #     DX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("80.exe")]
        # else:
        #     TOP = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("12.exe")]
        #     SX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("13.exe")]
        #     DX = [os.path.join(self.STIMULI_PATH, exe) for exe in os.listdir(self.STIMULI_PATH) if exe.endswith("15.exe")]
        #
        # TOPwindow = Popen(args=TOP)
        # SXwindow = Popen(args=SX)
        # DXwindow = Popen(args=DX)

        stimuliExe = os.path.join(self.STIMULI_PATH, "stimuli_all.exe")
        stimuliWin = Popen(args=stimuliExe)
        gevent.sleep(5)
        try:
            for i in xrange(self.ITERATIONS):
                # TOP
                winsound.Beep(1500, 250)
                for i in xrange(self.PAUSE_INTER_RECORDING):
                    print ('Seconds to record TOP: %i' % (self.PAUSE_INTER_RECORDING - i))
                    gevent.sleep(1)

                print ('Start recording TOP')
                self.isRecording = True
                gevent.sleep(self.PERIOD)
                
                self.isRecording = False
                print ('Stop recording TOP')

                # SX
                winsound.Beep(1500, 250)
                for i in xrange(self.PAUSE_INTER_RECORDING):
                    print ('Seconds to record SX: %i' % (self.PAUSE_INTER_RECORDING - i))
                    gevent.sleep(1)

                print ('Start recording SX')
                self.isRecording = True
                gevent.sleep(self.PERIOD)
                
                self.isRecording = False
                print ('Stop recording SX')
                
                # DX
                winsound.Beep(1500, 250)
                for i in xrange(self.PAUSE_INTER_RECORDING):
                    print ('Seconds to record DX: %i' % (self.PAUSE_INTER_RECORDING - i))
                    gevent.sleep(1)
                
                print ('Start recording DX')
                self.isRecording = True
                gevent.sleep(self.PERIOD)

                self.isRecording = False
                print ('Stop recording DX')
        except Exception as e:
            print ('Controller error: %s' % e)
            self.isRunning = False
        finally:
            # if TOPwindow is not None:
            #     TOPwindow.kill()
            # if SXwindow is not None:
            #     SXwindow.kill()
            # if DXwindow is not None:
            #     DXwindow.kill()
            if stimuliWin is not None:
                stimuliWin.kill()
            print ('Controller over')
            self.isRunning = False

    def recorder(self):
        """
        Greenlet that store data read from the headset into a numpy array
        """
        data = np.empty( (self.ITERATIONS * self.PERIOD * self.NUM_STIMULI, config.FS, len(self.sensors)) )
        counter = 0
        try:
            while self.isRunning or not self.recorderQueue.empty():
                # Controller greenlets controls the recording
                while self.isRecording or not self.recorderQueue.empty():
                    while not self.recorderQueue.empty():
                        buf = self.recorderQueue.get()

                        data[counter] = buf
                        counter += 1

                    gevent.sleep(1)
                gevent.sleep(0)
        except Exception as e:
            print ('Recorder error: %s' % e)
            self.isRunning = False
        finally:
            print ('Recorder over')
            data = data.reshape((self.ITERATIONS * self.PERIOD * self.NUM_STIMULI * config.FS, len(self.sensors)))
            sio.savemat(os.path.join(self.DATA_PATH, self.FILENAME), {'X' : data})
            self.isRunning = False

    def plot(self, bufferSize = 1000):
        """
            Greenlet that plot y once per .1
            The y scale is specified through global config but is dynamically adjusted
        """
        ax = plt.subplot(111)

        canvas = ax.figure.canvas
        plt.grid() # to ensure proper background restore
        background = None

        plotsNum = len(self.sensors)
        buffers = [deque([0]*bufferSize) for i in xrange(plotsNum)]
        lines = [plt.plot(buffers[i], lw=1, label=self.sensors[i]).pop() for i in xrange(plotsNum)]
        plt.legend()
        
        plt.axis([0, bufferSize, self.PLOT_MIN_Y, self.PLOT_MAX_Y])

        try:
            while self.isRunning:
                while not self.plotQueue.empty():
                    # Getting values from queue
                    values = self.plotQueue.get()
                    # Updating buffer
                    for j in range(len(values)):
                        [buffers[i].appendleft(values[j, i]) for i in xrange(plotsNum)]
                        [buffers[i].pop() for i in xrange(plotsNum)]

                if background is None:
                    background = canvas.copy_from_bbox(ax.bbox)
                canvas.restore_region(background)

                # Adjusting Y scale
                minY = min(min(buffers[0:])) - 100
                maxY = max(max(buffers[0:])) + 100
                plt.ylim([minY,maxY])
                
                # Plot refreshes with new buffer
                [lines[i].set_ydata(buffers[i]) for i in xrange(plotsNum)]

                plt.draw()
                plt.pause(0.000001)
                gevent.sleep(1)
        except Exception as e:
            print ('Plot error: %s' % e)
            self.isRunning = False
        finally:
            print 'Plot over'
            self.isRunning = False
Beispiel #33
0
class Worker(object):
    def __init__(self, workers_number, results_type="extend_list"):
        '''
        array_results: The return value of the function may be the [] array
        And extend all arrays into the array_results
        If the results_type is "extend_list", so each result is a list,
            and extend them
        If is "add_element" ,so each result is a element ,and add each element
            to the list
        '''
        self.workers_number = workers_number
        self.tasks = Queue()
        self.array_results = []
        self.results_type = results_type

    def put_tasks(self, all_tasks):
        '''
        The boss put all tasks into queue
        '''
        for one_task in all_tasks:
            self.tasks.put_nowait(one_task)

    def generate_boss(self, all_tasks):
        '''
        '''
        self.all_tasks_number = len(all_tasks)
        boss = [gevent.spawn(self.put_tasks, all_tasks)]
        return boss

    def get_tasks(self, worker_id, func, *args, **kwargs):
        '''
        The worker get all tasks from queue, and run the
        corresponding function
        '''
        while not self.tasks.empty():
            task = self.tasks.get()
            progress = self.show_progress()
            ret = func(task, progress, *args, **kwargs)
            if ret and self.results_type == "extend_list":
                self.array_results.extend(ret)
            elif ret and self.results_type == "add_element":
                self.array_results.append(ret)
            progress = self.show_progress()
            print(progress)
            # logger.info("The worker %s has got task %s " % (worker_id, task))

    def generate_workers(self, func, *args, **kwargs):
        '''
        Generate workers array
        '''
        workers = [
            gevent.spawn(self.get_tasks, worker_id, func, *args, **kwargs)
            for worker_id in xrange(1, self.workers_number + 1)
        ]
        return workers

    def joinall(self, boss, workers):
        all_spawns = boss + workers
        gevent.joinall(all_spawns)

    def return_results(self):
        '''
        Return the array results
        '''
        return self.array_results

    def show_progress(self):
        '''
        Show the progress in two ways
        1. current_task / all_task
        2. the percentage
        '''
        self.current_tasks_id = self.tasks.qsize()
        progress_one = '%s/%s' % (self.current_tasks_id, self.all_tasks_number)

        progress_percentage = 1 - float(self.current_tasks_id)\
            / float(self.all_tasks_number)
        progress_two = "%s" % (progress_percentage * 100)
        progress = [progress_one, progress_two]
        return progress

    def pack(self, all_tasks, func, *args, **kwargs):
        '''
        Pack all steps into one function
        '''
        boss = self.generate_boss(all_tasks)
        workers = self.generate_workers(func, *args, **kwargs)
        self.joinall(boss, workers)
        return self.return_results()
Beispiel #34
0
class ScraperGeventQueue(object):
    def __init__(self, scraperClass, gevent_num=100):
        self.scraperClass = scraperClass

        self.gevent_num = gevent_num
        self.tasks = Queue()

    @property
    def args(self):
        day_list = [('1989-01-01', '1999-12-31', 1)]
        for i in range(2000, 2017):
            for j in range(1, 13):
                begin_day = str(i) + '-' + str(j) + '-01'
                end_day = str(i) + '-' + str(j) + '-' + str(daysInMonth(i, j))
                day_list.append((begin_day, end_day, 1))
        return day_list

    def _run(self, task):
        begin_day, end_day, page_num = task
        scraper = self.scraperClass(begin_day, end_day, page_num)
        if page_num == 1:

            scraper.html = scraper.getHTML()
            totalPage = scraper.getTotalPageNumber()

            if totalPage > 200:
                new_end_day, new_begin_day = aveDay(begin_day, end_day)
                task1 = (begin_day, new_end_day, 1)
                task2 = (new_begin_day, end_day, 1)
                self.tasks.put(task1)
                self.tasks.put(task2)
                writeCSV('overflow_list', *task)
                writeCSV('split_list', *task1)
                writeCSV('split_list', *task2)
            elif totalPage > 1:
                writeCSV('total_page_list', *(begin_day, end_day, totalPage))
                [
                    self.tasks.put_nowait((begin_day, end_day, i))
                    for i in range(2, totalPage + 1)
                ]
                map(print, self.tasks)
            elif totalPage == 1:
                writeCSV('total_page_list', *(begin_day, end_day, totalPage))
            else:
                self.tasks.put(task)
                writeCSV('failure_list', *task)

            if os.path.isfile(scraper.file_name):
                indicator = True
            else:
                indicator = scraper.parseURL(scraper.html)
        else:
            indicator = scraper.start()

        if indicator:
            writeCSV('success_list', *task)
            print(begin_day, end_day, page_num, 'success')
        else:
            self.tasks.put(task)
            writeCSV('failure_list', *task)

    def worker(self):
        while not self.tasks.empty():
            task = self.tasks.get()
            self._run(task)

    def manager(self):
        for arg in self.args:
            self.tasks.put_nowait(arg)

    def start(self):
        gevent.spawn(self.manager).join()
        tasks = [gevent.spawn(self.worker) for i in range(self.gevent_num)]
        gevent.joinall(tasks)
Beispiel #35
0
class DatabaseConnectionPool(object):

    def __init__(self, maxsize=100):
        if not isinstance(maxsize, (int, long)):
            raise TypeError('Expected integer, got %r' % (maxsize, ))
        self.maxsize = maxsize
        self.pool = Queue()
        self.size = 0

    def get(self):
        pool = self.pool
        if self.size >= self.maxsize or pool.qsize():
            return pool.get()
        else:
            self.size += 1
            try:
                new_item = self.create_connection()
            except:
                self.size -= 1
                raise
            return new_item

    def put(self, item):
        self.pool.put(item)

    def closeall(self):
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass

    @contextlib.contextmanager
    def connection(self, isolation_level=None):
        conn = self.get()
        try:
            if isolation_level is not None:
                if conn.isolation_level == isolation_level:
                    isolation_level = None
                else:
                    conn.set_isolation_level(isolation_level)
            yield conn
        except:
            if conn.closed:
                conn = None
                self.closeall()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if conn.closed:
                raise OperationalError("Cannot commit because connection was closed: %r" % (conn, ))
            conn.commit()
        finally:
            if conn is not None and not conn.closed:
                if isolation_level is not None:
                    conn.set_isolation_level(isolation_level)
                self.put(conn)

    @contextlib.contextmanager
    def cursor(self, *args, **kwargs):
        isolation_level = kwargs.pop('isolation_level', None)
        conn = self.get()
        try:
            if isolation_level is not None:
                if conn.isolation_level == isolation_level:
                    isolation_level = None
                else:
                    conn.set_isolation_level(isolation_level)
            yield conn.cursor(*args, **kwargs)
        except:
            if conn.closed:
                conn = None
                self.closeall()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if conn.closed:
                raise OperationalError("Cannot commit because connection was closed: %r" % (conn, ))
            conn.commit()
        finally:
            if conn is not None and not conn.closed:
                if isolation_level is not None:
                    conn.set_isolation_level(isolation_level)
                self.put(conn)

    def _rollback(self, conn):
        try:
            conn.rollback()
        except:
            gevent.get_hub().handle_error(conn, *sys.exc_info())
            return
        return conn

    def execute(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.rowcount

    def fetchone(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchone()

    def fetchall(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchall()

    def fetchiter(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            while True:
                items = cursor.fetchmany()
                if not items:
                    break
                for item in items:
                    yield item
Beispiel #36
0
class WebSocket:
    def __init__(self, socket, environ):
        self.socket = socket
        self.version = environ.get("HTTP_SEC_WEBSOCKET_VERSION", None)
        self.path = environ.get("PATH_INFO", None)
        self.origin = environ.get("HTTP_ORIGIN", None)
        self.protocol = environ.get("HTTP_SEC_WEBSOCKET_PROTOCOL", None)
        self.closed = False
        self.status = None
        self._receive_error = None
        self._queue = Queue()
        self.max_length = 10 * 1024 * 1024
        gevent.spawn(self._listen)

    def set_max_message_length(self, length):
        self.max_length = length

    def _listen(self):
        try:
            while True:
                fin = False
                message = bytearray()
                is_first_message = True
                start_opcode = None
                while not fin:
                    payload, opcode, fin = self._get_frame(
                        max_length=self.max_length - len(message))
                    # Make sure continuation frames have correct information
                    if not is_first_message and opcode != 0:
                        self._error(STATUS_PROTOCOL_ERROR)
                    if is_first_message:
                        if opcode not in (OPCODE_TEXT, OPCODE_BINARY):
                            self._error(STATUS_PROTOCOL_ERROR)
                        # Save opcode
                        start_opcode = opcode
                    message += payload
                    is_first_message = False
                message = bytes(message)
                if start_opcode == OPCODE_TEXT:  # UTF-8 text
                    try:
                        message = message.decode()
                    except UnicodeDecodeError:
                        self._error(STATUS_DATA_ERROR)
                self._queue.put(message)
        except Exception as e:
            self.closed = True
            self._receive_error = e
            self._queue.put(None)  # To make sure the error is read

    def receive(self):
        if not self._queue.empty():
            return self.receive_nowait()
        if isinstance(self._receive_error, EOFError):
            return None
        if self._receive_error:
            raise self._receive_error
        self._queue.peek()
        return self.receive_nowait()

    def receive_nowait(self):
        ret = self._queue.get_nowait()
        if self._receive_error and not isinstance(self._receive_error,
                                                  EOFError):
            raise self._receive_error
        return ret

    def send(self, data):
        if self.closed:
            raise EOFError()
        if isinstance(data, str):
            self._send_frame(OPCODE_TEXT, data.encode())
        elif isinstance(data, bytes):
            self._send_frame(OPCODE_BINARY, data)
        else:
            raise TypeError("Expected str or bytes, got " + repr(type(data)))

    # Reads a frame from the socket. Pings, pongs and close packets are handled
    # automatically
    def _get_frame(self, max_length):
        while True:
            payload, opcode, fin = self._read_frame(max_length=max_length)
            if opcode == OPCODE_PING:
                self._send_frame(OPCODE_PONG, payload)
            elif opcode == OPCODE_PONG:
                pass
            elif opcode == OPCODE_CLOSE:
                if len(payload) >= 2:
                    self.status = struct.unpack("!H", payload[:2])[0]
                was_closed = self.closed
                self.closed = True
                if not was_closed:
                    # Send a close frame in response
                    self.close(STATUS_OK)
                raise EOFError()
            else:
                return payload, opcode, fin

    # Low-level function, use _get_frame instead
    def _read_frame(self, max_length):
        header = self._recv_exactly(2)

        if not (header[1] & 0x80):
            self._error(STATUS_POLICY_VIOLATION)

        opcode = header[0] & 0xf
        fin = bool(header[0] & 0x80)

        payload_length = header[1] & 0x7f
        if payload_length == 126:
            payload_length = struct.unpack("!H", self._recv_exactly(2))[0]
        elif payload_length == 127:
            payload_length = struct.unpack("!Q", self._recv_exactly(8))[0]

        # Control frames are handled in a special way
        if opcode in (OPCODE_PING, OPCODE_PONG):
            max_length = 125

        if payload_length > max_length:
            self._error(STATUS_TOO_LONG)

        mask = self._recv_exactly(4)
        payload = self._recv_exactly(payload_length)
        payload = self._unmask(payload, mask)

        return payload, opcode, fin

    def _recv_exactly(self, length):
        buf = bytearray()
        while len(buf) < length:
            block = self.socket.recv(min(4096, length - len(buf)))
            if block == b"":
                raise EOFError()
            buf += block
        return bytes(buf)

    def _unmask(self, payload, mask):
        def gen(c):
            return bytes([x ^ c for x in range(256)])

        payload = bytearray(payload)
        payload[0::4] = payload[0::4].translate(gen(mask[0]))
        payload[1::4] = payload[1::4].translate(gen(mask[1]))
        payload[2::4] = payload[2::4].translate(gen(mask[2]))
        payload[3::4] = payload[3::4].translate(gen(mask[3]))
        return bytes(payload)

    def _send_frame(self, opcode, data):
        for i in range(0, len(data), SEND_PACKET_SIZE):
            part = data[i:i + SEND_PACKET_SIZE]
            fin = int(i == (len(data) - 1) // SEND_PACKET_SIZE *
                      SEND_PACKET_SIZE)
            header = bytes([(opcode if i == 0 else 0) | (fin << 7),
                            min(len(part), 126)])
            if len(part) >= 126:
                header += struct.pack("!H", len(part))
            self.socket.sendall(header + part)

    def _error(self, status):
        self.close(status)
        raise EOFError()

    def close(self, status=STATUS_OK):
        self.closed = True
        try:
            self._send_frame(OPCODE_CLOSE, struct.pack("!H", status))
        except (BrokenPipeError, ConnectionResetError):
            pass
        self.socket.close()
class DatabaseConnectionPool(object):
    """
    - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함.
    """
    def __init__(self, max_size, auto_commit, fetchiter_size):
        if not isinstance(max_size, int):
            raise TypeError('Expected integer, got %r' % (max_size, ))
        self.max_size = max_size
        self.auto_commit = auto_commit
        self.pool = Queue()
        self.size = 0
        self.fetchiter_size = fetchiter_size

    def get(self):
        #        print('size/max_size: %s/%s' % (self.size, self.max_size)
        #        print('pool.qsize(): %s' % (self.pool.qsize())

        #        if self.size >= self.max_size or self.pool.qsize():
        if self.pool.qsize() >= self.max_size:
            return self.pool.get()
        else:
            self.size += 1
            try:
                new_conn = self.create_connection()
            except:
                self.size -= 1
                raise
            return new_conn

    def put(self, item):
        self.pool.put(item)

    def close_all(self):
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.close()
            except:
                pass

    def commit_all(self):
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.commit()
            except:
                pass

    @contextlib.contextmanager
    def connection(self, isolation_level=None):
        conn = self.get()
        try:
            if isolation_level is not None:
                if conn.isolation_level == isolation_level:
                    isolation_level = None
                else:
                    conn.set_isolation_level(isolation_level)
            yield conn
        except:
            if not conn.open:
                conn = None
                self.close_all()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if not conn.open:
                raise OperationalError(
                    "Cannot commit because connection was closed: %r" %
                    (conn, ))
        finally:
            if conn is not None and conn.open:
                if isolation_level is not None:
                    conn.set_isolation_level(isolation_level)
                self.put(conn)

    @contextlib.contextmanager
    def cursor(self, *args, **kwargs):
        try:
            isolation_level = kwargs.pop('isolation_level', None)
            with self.connection(isolation_level) as conn:
                yield conn.cursor(cursorclass=pymysql.cursors.SSDictCursor,
                                  *args,
                                  **kwargs)
        except:
            raise

    def _rollback(self, conn):
        try:
            conn.rollback()
        except:
            gevent.hub.get_hub().handle_error(conn, *sys.exc_info())
            return
        return conn

    def execute(self, *args, **kwargs):
        try:
            with self.cursor(**kwargs) as cursor:
                cursor.execute(*args)
                return cursor.rowcount
        except:
            raise

    def executemany(self, *args, **kwargs):
        try:
            with self.cursor(**kwargs) as cursor:
                cursor.executemany(*args)
                return cursor.rowcount
        except:
            raise

    def fetchone(self, *args, **kwargs):
        try:
            with self.cursor(**kwargs) as cursor:
                cursor.execute(*args)
                return cursor.fetchone()
        except:
            raise

    def fetchall(self, *args, **kwargs):
        try:
            with self.cursor(**kwargs) as cursor:
                cursor.execute(*args)
                return cursor.fetchall()
        except:
            raise

    def fetchiter(self, *args, **kwargs):
        try:
            with self.cursor(**kwargs) as cursor:
                cursor.execute(*args)
                while True:
                    items = cursor.fetchmany(size=self.fetchiter_size)
                    if not items:
                        break
                    for item in items:
                        yield item
        except:
            raise
Beispiel #38
0
class WorkEngine(object):
    def __init__(self, **kwargs):
        self._module_dict = {}
        self._redis_client = RedisClient(host=config_setting.queue_host,
                                         port=config_setting.queue_port,
                                         password=config_setting.queue_pwd)
        self._secret_key = 'd6f89b09'
        self._wid = unique_machine
        self._queue_list = ['ultron:work:work_id:' + str(self._wid)]
        #生成token
        self._token = hashlib.sha1(
            (self._secret_key +
             self._wid.replace('-', '')).encode()).hexdigest()
        self._task_queue = Queue()  #
        self.init_modules()
        gevent.spawn(self._get_task)
        gevent.spawn(self._dispatch_task)
        gevent.spawn(self._heart_tick)
        gevent.sleep()

    def init_modules(self):
        l = json.loads(moddules_info)
        for setting in l:
            self.load_modules(setting)

    def load_modules(self, setting):
        name = setting['name']
        is_effective = setting['isEffective']
        if is_effective == 0:
            return
        module_name = 'ultron.cluster.work.extern_modules.' + name + '.module'
        try:
            module = importlib.import_module(module_name)
            if 'Module' in dir(module):
                strategy_class = module.__getattribute__('Module')
                self._module_dict[name] = strategy_class(
                    name, self._wid, self._token, self._redis_client)
                print('module %s loading' % (name))
                if name == 'login':
                    self._module_dict[name].login_master()
        except Exception as e:
            print('Failed to import module:%s:[%s]' % (name, str(e)))

    def _heart_tick(self):
        last_time = datetime.datetime.now()
        while True:
            now_time = datetime.datetime.now()
            if (now_time - last_time).seconds > 20:  #发送心跳包
                task = {'name': 'login', 'opcode': 'heart_tick'}
                self._module_dict[task['name']].process_respone(task)
                last_time = now_time
            gevent.sleep(.3)

    def _get_task(self):
        while True:
            for queue in self._queue_list:
                task_all = self._redis_client.hmgetall(queue)
                task_list = task_all[0]
                self._redis_client.hmdel(queue, task_list.keys())
                for tid, task in task_list.items():
                    self._task_queue.put(json.loads(task))
            gevent.sleep(.3)

    #用于处理各个节点登录
    def _dispatch_task(self):
        while True:
            while not self._task_queue.empty():
                task = self._task_queue.get()
                space_name = str(task.get('name'))
                if space_name in self._module_dict:
                    self._module_dict[space_name].process_respone(task)
            gevent.sleep(.3)
Beispiel #39
0
class TestChannelInt(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

    #@skip('Not working consistently on buildbot')
    def test_consume_one_message_at_a_time(self):
        # end to end test
        #    - Process P1 is producing one message every 5 seconds
        #    - Process P2 is producing one other message every 3 seconds
        #    - Process S creates a auto-delete=False queue without a consumer and without a binding
        #    - Process S binds this queue through a pyon.net or container API call to the topic of process P1
        #    - Process S waits a bit
        #    - Process S checks the number of messages in the queue
        #    - Process S creates a consumer, takes one message off the queue (non-blocking) and destroys the consumer
        #    - Process S waits a bit (let messages accumulate)
        #    - Process S creates a consumer, takes a message off and repeates it until no messges are left (without ever blocking) and destroys the consumer
        #    - Process S waits a bit (let messages accumulate)
        #    - Process S creates a consumer, takes a message off and repeates it until no messges are left (without ever blocking). Then requeues the last message and destroys the consumer
        #    - Process S creates a consumer, takes one message off the queue (non-blocking) and destroys the consumer.
        #    - Process S sends prior message to its queue (note: may be tricky without a subscription to yourself)
        #    - Process S changes the binding of queue to P1 and P2
        #    - Process S removes all bindings of queue
        #    - Process S deletes the queue
        #    - Process S exists without any residual resources in the broker
        #    - Process P1 and P1 get terminated without any residual resources in the broker
        #
        #    * Show this works with the ACK or no-ACK mode
        #    * Do the above with semi-abstracted calles (some nicer boilerplate)

        def every_five():
            p = self.container.node.channel(PublisherChannel)
            p._send_name = NameTrio(bootstrap.get_sys_name(), 'routed.5')
            counter = 0

            while not self.publish_five.wait(timeout=1):
                p.send('5,' + str(counter))
                counter += 1

        def every_three():
            p = self.container.node.channel(PublisherChannel)
            p._send_name = NameTrio(bootstrap.get_sys_name(), 'routed.3')
            counter = 0

            while not self.publish_three.wait(timeout=0.6):
                p.send('3,' + str(counter))
                counter += 1

        self.publish_five = Event()
        self.publish_three = Event()
        self.five_events = Queue()
        self.three_events = Queue()

        gl_every_five = spawn(every_five)
        gl_every_three = spawn(every_three)

        def listen(lch):
            """
            The purpose of the this listen method is to trigger waits in code below.
            By setting up a listener that subscribes to both 3 and 5, and putting received
            messages into the appropriate gevent-queues client side, we can assume that
            the channel we're actually testing with get_stats etc has had the message delivered
            too.
            """
            lch._queue_auto_delete = False
            lch.setup_listener(
                NameTrio(bootstrap.get_sys_name(), 'alternate_listener'),
                'routed.3')
            lch._bind('routed.5')
            lch.start_consume()

            while True:
                try:
                    newchan = lch.accept()
                    m, h, d = newchan.recv()
                    count = m.rsplit(',', 1)[-1]
                    if m.startswith('5,'):
                        self.five_events.put(int(count))
                        newchan.ack(d)
                    elif m.startswith('3,'):
                        self.three_events.put(int(count))
                        newchan.ack(d)
                    else:
                        raise StandardError("unknown message: %s" % m)

                except ChannelClosedError:
                    break

        lch = self.container.node.channel(SubscriberChannel)
        gl_listen = spawn(listen, lch)

        def do_cleanups(gl_e5, gl_e3, gl_l, lch):
            self.publish_five.set()
            self.publish_three.set()
            gl_e5.join(timeout=5)
            gl_e3.join(timeout=5)

            lch.stop_consume()
            lch._destroy_queue()
            lch.close()
            gl_listen.join(timeout=5)

        self.addCleanup(do_cleanups, gl_every_five, gl_every_three, gl_listen,
                        lch)

        ch = self.container.node.channel(RecvChannel)
        ch._recv_name = NameTrio(bootstrap.get_sys_name(), 'test_queue')
        ch._queue_auto_delete = False

        # #########
        # THIS TEST EXPECTS OLD BEHAVIOR OF NO QOS, SO SET A HIGH BAR
        # #########
        ch._transport.qos_impl(prefetch_count=9999)

        def cleanup_channel(thech):
            thech._destroy_queue()
            thech.close()

        self.addCleanup(cleanup_channel, ch)

        # declare exchange and queue, no binding yet
        ch._declare_exchange(ch._recv_name.exchange)
        ch._declare_queue(ch._recv_name.queue)
        ch._purge()

        # do binding to 5 pub only
        ch._bind('routed.5')

        # wait for one message
        self.five_events.get(timeout=2)

        # ensure 1 message, 0 consumer
        self.assertTupleEqual((1, 0), ch.get_stats())

        # start a consumer
        ch.start_consume()
        time.sleep(0.2)
        self.assertEquals(
            ch._recv_queue.qsize(),
            1)  # should have been delivered to the channel, waiting for us now

        # receive one message with instant timeout
        m, h, d = ch.recv(timeout=0)
        self.assertEquals(m, "5,0")
        ch.ack(d)

        # we have no more messages, should instantly fail
        self.assertRaises(PQueue.Empty, ch.recv, timeout=0)

        # stop consumer
        ch.stop_consume()

        # wait until next 5 publish event
        num = self.five_events.get(timeout=2)
        self.assertEquals(num, 1)

        # start consumer again, empty queue
        ch.start_consume()
        time.sleep(0.1)
        while True:
            try:
                m, h, d = ch.recv(timeout=0)
                self.assertTrue(m.startswith('5,'))
                ch.ack(d)
            except PQueue.Empty:
                ch.stop_consume()
                break

        # wait for new message
        num = self.five_events.get(timeout=2)
        self.assertEquals(num, 2)

        # consume and requeue
        ch.start_consume()
        time.sleep(0.1)
        m, h, d = ch.recv(timeout=0)
        self.assertTrue(m.startswith('5,'))
        ch.reject(d, requeue=True)

        # rabbit appears to deliver this later on, only when we've got another message in it
        # wait for another message publish
        num = self.five_events.get(timeout=2)
        self.assertEquals(num, 3)
        time.sleep(0.1)

        expect = ["5,2", "5,3"]
        while True:
            try:
                m, h, d = ch.recv(timeout=0)
                self.assertTrue(m.startswith('5,'))
                self.assertEquals(m, expect.pop(0))

                ch.ack(d)
            except PQueue.Empty:
                ch.stop_consume()
                self.assertListEqual(expect, [])
                break

        # let's change the binding to the 3 now, empty the testqueue first (artifact of test)
        while not self.three_events.empty():
            self.three_events.get(timeout=0)

        # we have to keep the exchange around - it will likely autodelete.
        ch2 = self.container.node.channel(RecvChannel)
        ch2.setup_listener(NameTrio(bootstrap.get_sys_name(), "another_queue"))

        ch._destroy_binding()
        ch._bind('routed.3')

        ch2._destroy_queue()
        ch2.close()

        self.three_events.get(timeout=1)
        ch.start_consume()
        time.sleep(0.1)
        self.assertEquals(ch._recv_queue.qsize(), 1)

        m, h, d = ch.recv(timeout=0)
        self.assertTrue(m.startswith('3,'))
        ch.ack(d)

        # wait for a new 3 to reject
        self.three_events.get(timeout=10)
        time.sleep(0.1)

        m, h, d = ch.recv(timeout=0)
        ch.reject(d, requeue=True)

        # recycle consumption, should get the requeued message right away?
        ch.stop_consume()
        ch.start_consume()
        time.sleep(0.1)

        self.assertEquals(ch._recv_queue.qsize(), 1)

        m2, h2, d2 = ch.recv(timeout=0)
        self.assertEquals(m, m2)

        ch.stop_consume()
Beispiel #40
0
class Engine(object):

    def __init__(self, sequence, database, phase_hook=None, batch_size=100,
                 force=False, retries=3):
        self.sequence = sequence
        self.database = database
        self.queue = Queue()
        self.phase_hook = phase_hook
        self.batch_size = batch_size
        self.force = force
        self.retries = retries
        self.errors = []

    def _push_to_target(self, targets):
        """Get a batch of elements from the queue, and push it to the targets.

        This function returns True if it proceeded all the elements in
        the queue, and there isn't anything more to read.
        """
        if self.queue.empty():
            return 0    # nothing

        batch = []
        pushed = 0

        # collecting a batch
        while len(batch) < self.batch_size:
            item = self.queue.get()
            if item == 'END':
                pushed += 1  # the 'END' item
                break
            batch.append(item)

        if len(batch) != 0:
            greenlets = Group()
            for plugin in targets:
                green = greenlets.spawn(self._put_data, plugin, batch)
                green.link_exception(partial(self._error,
                                             exception.InjectError, plugin))
            greenlets.join()
            pushed += len(batch)

        return pushed

    #
    # transaction managment
    #
    def _start_transactions(self, plugins):
        for plugin in plugins:
            plugin.start_transaction()

    def _commit_transactions(self, plugins):
        # XXX what happends when this fails?
        for plugin in plugins:
            plugin.commit_transaction()

    def _rollback_transactions(self, plugins):
        for plugin in plugins:
            plugin.rollback_transaction()

    def _put_data(self, plugin, data):
        return plugin.inject(data)

    def _get_data(self, plugin, start_date, end_date):
        try:
            for item in plugin.extract(start_date, end_date):
                self.queue.put((plugin.get_id(), item))
        finally:
            self.queue.put('END')

    def _log_transaction(self, source, start_date, end_date, greenlet):
        self.database.add_entry([source], start_date, end_date)

    def _error(self, exception, plugin, greenlet):
        self.errors.append((exception, plugin, greenlet))

    def _run_phase(self, phase, start_date, end_date):
        phase, sources, targets = phase
        logger.info('Running phase %r' % phase)
        self._reset_counters()

        self._start_transactions(targets)
        self.database.start_transaction()
        try:
            greenlets = Group()
            # each callable will push its result in the queue
            for source in sources:
                exists = self.database.exists(source, start_date, end_date)
                if exists and not self.force:
                    logger.info('Already done: %s, %s to %s' % (
                        source.get_id(), start_date, end_date))
                    continue

                green = greenlets.spawn(self._get_data, source,
                                        start_date, end_date)
                green.link_value(partial(self._log_transaction, source,
                                         start_date, end_date))
                green.link_exception(partial(self._error,
                                             exception.ExtractError, source))

            # looking at the queue
            pushed = 0

            while len(greenlets) > 0 or self.queue.qsize() > 0:
                gevent.sleep(0)
                pushed += self._push_to_target(targets)
                # let's see if we have some errors
                if len(self.errors) > 0:
                    # yeah! we need to rollback
                    # XXX later we'll do a source-by-source rollback
                    raise exception.RunError(self.errors)

        except Exception:
            self._rollback_transactions(targets)
            self.database.rollback_transaction()
            raise
        else:
            self._commit_transactions(targets)
            self.database.commit_transaction()

    def _clear(self, start_date, end_date):
        source_ids = set()
        plugins = []
        for phase, sources, targets in self.sequence:
            source_ids.update(set([s.get_id() for s in sources]))
            plugins.extend(targets)

        for target in plugins:
            try:
                target.clear(start_date, end_date, list(source_ids))
            except Exception:
                logger.error('Failed to clear %r' % target.get_id())

    def _purge(self, start_date, end_date):
        for phase, sources, targets in self.sequence:
            for source in sources:
                try:
                    source.purge(start_date, end_date)
                except Exception:
                    logger.error('Failed to purge %r' % source.get_id())

    def _retry(self, func, *args, **kw):
        tries = 0
        retries = self.retries
        while tries < retries:
            try:
                return func(*args, **kw)
            except Exception, exc:
                self.queue.queue.clear()
                logger.exception('%s failed (%d/%d)' % (func, tries + 1,
                                                        retries))
                tries += 1
        raise
Beispiel #41
0
class ScraperGeventQueue(object):
    '''
    A gevent queue for RT-Mart scraper.
     Parameters
     ----------
     scraperClass: class
      RT-Mart page scraper for CPP
     categories: list
      a category list for CPP
     areas: list
      a area information list
     gevent_num=100: int
      maximum running gevent number.  
    '''
    
    def __init__(self,scraperClass,categories,areas,gevent_num=80):
        self.scraperClass = scraperClass
        self.args = self._args(categories,areas)
        
        self.gevent_num = gevent_num
        self.tasks = Queue()    # create a gevent queue
        
        self.failure_list = []
        
    def _args(self,categories,areas):
        '''
        Parse input args.
         Parameters
         ----------
          category: list or tuple
           a group of input categories
          areas: list or tuple
           a group pf input area information
         Returns
         -------
          new_args: list
           a group of new input args 
        
        '''
        
        new_args = []
        for category in categories:
            for area in areas:
                new_args.append((category,area,1))
        return new_args
    
    def _run(self,task):
        '''
        Define the run function.
         Parameters
         ----------
          task: list or tuple
           a group of parameters for self.scraperClass 
        '''
        
        # split parameters for the page scraper class
        category_name,area_info,page_num=task
        
        # run the page scraper class
        scraper = self.scraperClass(category_name,area_info,page_num)
        scraper.json = scraper.getJSON()
        
        # if connect error, add the parameters into queue once again
        if (not scraper.json) and (task not in self.failure_list):
            self.failure_list.append(task)
            self.tasks.put_nowait(task)
            
        data_list = scraper.parseJSON(scraper.json)
        indicator = scraper.writeMongoDB(data_list)
        #indicator = scraper.writeCSV(data_list)
        
        # produce new parameters and add them to the gevent queue
        if (page_num == 1) and indicator:
            total_page = scraper.getTotalPageNumber()
            if total_page>1:
                [self.tasks.put_nowait((category_name,area_info,i)) for i in range(2,total_page+1)]
        
    def worker(self):
        'A gevent worker.'
        
        while not self.tasks.empty():
            task = self.tasks.get()
            self._run(task)

    def manager(self):
        'A gevent manager, creating the initial gevents'
        
        for arg in self.args:
            self.tasks.put_nowait(arg)
            
    def start(self):
        'Run the gevent queue.'
        
        gevent.spawn(self.manager).join()
        tasks = [gevent.spawn(self.worker) for i in range(self.gevent_num)]
        gevent.joinall(tasks)
        
#----------class definition----------    
            
Beispiel #42
0
class TestResourceAgreementWorker(unittest.TestCase):
    worker_config = {
        'worker_config': {
            'worker_type': 'basic_couchdb',
            'client_inc_step_timeout': 0.1,
            'client_dec_step_timeout': 0.02,
            'drop_threshold_client_cookies': 1.5,
            'worker_sleep': 5,
            'retry_default_timeout': 0.5,
            'retries_count': 2,
            'queue_timeout': 3,
            'bulk_save_limit': 100,
            'bulk_save_interval': 3
        },
        'storage_config': {
            # required for databridge
            "storage_type":
            "couchdb",  # possible values ['couchdb', 'elasticsearch']
            # arguments for storage configuration
            "host": "localhost",
            "port": 5984,
            "user": "",
            "password": "",
            "db_name": "basic_bridge_db",
            "bulk_query_interval": 3,
            "bulk_query_limit": 100,
        },
        'filter_type': 'basic_couchdb',
        'retrievers_params': {
            'down_requests_sleep': 5,
            'up_requests_sleep': 1,
            'up_wait_sleep': 30,
            'queue_size': 1001
        },
        'extra_params': {
            "mode": "_all_",
            "limit": 1000
        },
        'bridge_mode': 'basic',
        'resources_api_server': 'http://*****:*****@patch('openprocurement.bridge.basic.workers.logger')
    def test_add_to_retry_queue(self, mocked_logger):
        retry_items_queue = PriorityQueue()
        worker = AgreementWorker(config_dict=self.worker_config,
                                 retry_resource_items_queue=retry_items_queue)
        resource_item = {'id': uuid.uuid4().hex}
        priority = 1000
        self.assertEqual(retry_items_queue.qsize(), 0)

        # Add to retry_resource_items_queue
        worker.add_to_retry_queue(resource_item, priority=priority)

        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item = retry_items_queue.get()
        self.assertEqual((priority, retry_resource_item),
                         (1001, resource_item))

        resource_item = {'id': 0}
        # Add to retry_resource_items_queue with status_code '429'
        worker.add_to_retry_queue(resource_item, priority, status_code=429)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item = retry_items_queue.get()
        self.assertEqual((priority, retry_resource_item),
                         (1001, resource_item))

        priority = 1002
        worker.add_to_retry_queue(resource_item, priority=priority)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item = retry_items_queue.get()
        self.assertEqual((priority, retry_resource_item),
                         (1003, resource_item))

        worker.add_to_retry_queue(resource_item, priority=priority)
        self.assertEqual(retry_items_queue.qsize(), 0)
        mocked_logger.critical.assert_called_once_with(
            'Tender {} reached limit retries count {} and droped from '
            'retry_queue.'.format(resource_item['id'],
                                  worker.config['retries_count']),
            extra={
                'MESSAGE_ID': 'dropped_documents',
                'JOURNAL_TENDER_ID': resource_item['id']
            })
        del worker

    def test__get_api_client_dict(self):
        api_clients_queue = Queue()
        client = MagicMock()
        client_dict = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        client_dict2 = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        api_clients_queue.put(client_dict)
        api_clients_queue.put(client_dict2)
        api_clients_info = {
            client_dict['id']: {
                'drop_cookies': False,
                'not_actual_count': 5,
                'request_interval': 3
            },
            client_dict2['id']: {
                'drop_cookies': True,
                'not_actual_count': 3,
                'request_interval': 2
            }
        }

        # Success test
        worker = AgreementWorker(api_clients_queue=api_clients_queue,
                                 config_dict=self.worker_config,
                                 api_clients_info=api_clients_info)
        self.assertEqual(worker.api_clients_queue.qsize(), 2)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, client_dict)

        # Get lazy client
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client['not_actual_count'], 0)
        self.assertEqual(api_client['request_interval'], 0)

        # Empty queue test
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client, None)

        # Exception when try renew cookies
        client.renew_cookies.side_effect = Exception('Can\'t renew cookies')
        worker.api_clients_queue.put(client_dict2)
        api_clients_info[client_dict2['id']]['drop_cookies'] = True
        api_client = worker._get_api_client_dict()
        self.assertIs(api_client, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(worker.api_clients_queue.get(), client_dict2)

        # Get api_client with raise Empty exception
        api_clients_queue = MagicMock()
        api_clients_queue.empty.return_value = False
        api_clients_queue.get = MagicMock(side_effect=Empty)
        worker.api_clients_queue = api_clients_queue
        api_client = worker._get_api_client_dict()

        self.assertEqual(api_client, None)
        del worker

    def test__get_resource_item_from_queue(self):
        items_queue = PriorityQueue()
        item = (1, {'id': uuid.uuid4().hex})
        items_queue.put(item)

        # Success test
        worker = AgreementWorker(resource_items_queue=items_queue,
                                 config_dict=self.worker_config)
        self.assertEqual(worker.resource_items_queue.qsize(), 1)
        priority, resource_item = worker._get_resource_item_from_queue()
        self.assertEqual((priority, resource_item), item)
        self.assertEqual(worker.resource_items_queue.qsize(), 0)

        # Empty queue test
        priority, resource_item = worker._get_resource_item_from_queue()
        self.assertEqual(resource_item, None)
        self.assertEqual(priority, None)
        del worker

    @patch('openprocurement_client.client.TendersClient')
    def test__get_resource_item_from_public(self, mock_api_client):
        resource_item = {'id': uuid.uuid4().hex}
        resource_item_id = uuid.uuid4().hex
        priority = 1

        api_clients_queue = Queue()
        client_dict = {
            'id': uuid.uuid4().hex,
            'request_interval': 0.02,
            'client': mock_api_client
        }
        api_clients_queue.put(client_dict)
        api_clients_info = {
            client_dict['id']: {
                'drop_cookies': False,
                'request_durations': {}
            }
        }
        retry_queue = PriorityQueue()
        return_dict = {
            'data': {
                'id': resource_item_id,
                'dateModified': datetime.datetime.utcnow().isoformat()
            }
        }
        mock_api_client.get_resource_item.return_value = return_dict
        worker = AgreementWorker(api_clients_queue=api_clients_queue,
                                 config_dict=self.worker_config,
                                 retry_resource_items_queue=retry_queue,
                                 api_clients_info=api_clients_info)

        # Success test
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(api_client['request_interval'], 0.02)
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 0)
        self.assertEqual(public_item, return_dict['data'])

        # InvalidResponse
        mock_api_client.get_resource_item.side_effect = InvalidResponse(
            'invalid response')
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(public_item, None)
        sleep(worker.config['retry_default_timeout'] * 1)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 1)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)

        # RequestFailed status_code=429
        mock_api_client.get_resource_item.side_effect = RequestFailed(
            munchify({'status_code': 429}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'], 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(public_item, None)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 2)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        self.assertEqual(api_client['request_interval'],
                         worker.config['client_inc_step_timeout'])

        # RequestFailed status_code=429 with drop cookies
        api_client['request_interval'] = 2
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        sleep(api_client['request_interval'])
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(public_item, None)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 3)

        # RequestFailed with status_code not equal 429
        mock_api_client.get_resource_item.side_effect = RequestFailed(
            munchify({'status_code': 404}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 4)

        # ResourceNotFound
        mock_api_client.get_resource_item.side_effect = RNF(
            munchify({'status_code': 404}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 5)

        # ResourceGone
        mock_api_client.get_resource_item.side_effect = ResourceGone(
            munchify({'status_code': 410}))
        api_client = worker._get_api_client_dict()
        self.assertEqual(worker.api_clients_queue.qsize(), 0)
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(public_item, None)
        self.assertEqual(worker.api_clients_queue.qsize(), 1)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 5)

        # Exception
        api_client = worker._get_api_client_dict()
        mock_api_client.get_resource_item.side_effect = Exception(
            'text except')
        public_item = worker._get_resource_item_from_public(
            api_client, priority, resource_item)
        self.assertEqual(public_item, None)
        self.assertEqual(api_client['request_interval'], 0)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(worker.retry_resource_items_queue.qsize(), 6)

        del worker

    def test_shutdown(self):
        worker = AgreementWorker(
            'api_clients_queue', 'resource_items_queue', 'db', {
                'worker_config': {
                    'bulk_save_limit': 1,
                    'bulk_save_interval': 1
                },
                'resource': 'tenders'
            }, 'retry_resource_items_queue')
        self.assertEqual(worker.exit, False)
        worker.shutdown()
        self.assertEqual(worker.exit, True)

    def up_worker(self):
        worker_thread = AgreementWorker.spawn(
            resource_items_queue=self.queue,
            retry_resource_items_queue=self.retry_queue,
            api_clients_info=self.api_clients_info,
            api_clients_queue=self.api_clients_queue,
            config_dict=self.worker_config,
            db=self.db)
        idle()
        worker_thread.shutdown()
        sleep(3)

    @patch('openprocurement.bridge.basic.workers.handlers_registry')
    @patch(
        'openprocurement.bridge.basic.workers.AgreementWorker._get_resource_item_from_public'
    )
    @patch('openprocurement.bridge.basic.workers.logger')
    def test__run(self, mocked_logger, mock_get_from_public, mock_registry):
        self.queue = Queue()
        self.retry_queue = Queue()
        self.api_clients_queue = Queue()
        queue_item = (1, {
            'id': uuid.uuid4().hex,
            'procurementMethodType': 'closeFrameworkAgreementUA'
        })
        doc = {
            'id': queue_item[1],
            '_rev': '1-{}'.format(uuid.uuid4().hex),
            'dateModified': datetime.datetime.utcnow().isoformat(),
            'doc_type': 'Tender'
        }
        client = MagicMock()
        api_client_dict = {
            'id': uuid.uuid4().hex,
            'client': client,
            'request_interval': 0
        }
        client.session.headers = {'User-Agent': 'Test-Agent'}
        self.api_clients_info = {
            api_client_dict['id']: {
                'drop_cookies': False,
                'request_durations': []
            }
        }
        self.db = MagicMock()
        worker = AgreementWorker(api_clients_queue=self.api_clients_queue,
                                 resource_items_queue=self.queue,
                                 retry_resource_items_queue=self.retry_queue,
                                 db=self.db,
                                 api_clients_info=self.api_clients_info,
                                 config_dict=self.worker_config)
        worker.exit = MagicMock()
        worker.exit.__nonzero__.side_effect = [False, True]

        # Try get api client from clients queue
        self.assertEqual(self.queue.qsize(), 0)
        worker._run()
        self.assertEqual(self.queue.qsize(), 0)
        mocked_logger.critical.assert_called_once_with(
            'API clients queue is empty.')

        # Try get item from resource items queue with no handler
        self.api_clients_queue.put(api_client_dict)
        worker.exit.__nonzero__.side_effect = [False, True]
        mock_registry.get.return_value = ''
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        worker._run()
        self.assertEqual(mocked_logger.critical.call_args_list, [
            call('API clients queue is empty.'),
            call('Not found handler for procurementMethodType: {}, {} {}'.
                 format(doc['id']['procurementMethodType'],
                        self.worker_config['resource'][:-1], doc['id']['id']),
                 extra={
                     'JOURNAL_TENDER_ID': doc['id']['id'],
                     'MESSAGE_ID': 'bridge_worker_exception'
                 })
        ])

        # Try get item from resource items queue
        self.api_clients_queue.put(api_client_dict)
        worker.exit.__nonzero__.side_effect = [False, True]
        handler_mock = MagicMock()
        handler_mock.process_resource.return_value = None
        mock_registry.return_value = {
            'closeFrameworkAgreementUA': handler_mock
        }
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[2:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('PUT API CLIENT: {}'.format(api_client_dict['id']),
                 extra={'MESSAGE_ID': 'put_client'}),
            call('Resource items queue is empty.')
        ])

        # Try get resource item from local storage
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[5:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('Get tender {} from main queue.'.format(doc['id']['id']))
        ])

        # Try get local_resource_item with Exception
        self.api_clients_queue.put(api_client_dict)
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        self.db.get.side_effect = [Exception('Database Error')]
        worker.exit.__nonzero__.side_effect = [False, True]
        worker._run()
        self.assertEqual(mocked_logger.debug.call_args_list[7:], [
            call('GET API CLIENT: {} {} with requests interval: {}'.format(
                api_client_dict['id'],
                api_client_dict['client'].session.headers['User-Agent'],
                api_client_dict['request_interval']),
                 extra={
                     'REQUESTS_TIMEOUT': 0,
                     'MESSAGE_ID': 'get_client'
                 }),
            call('Get tender {} from main queue.'.format(doc['id']['id']))
        ])

        # Try process resource with Exception
        self.api_clients_queue.put(api_client_dict)
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]

        mock_handler = MagicMock()
        mock_handler.process_resource.side_effect = (RequestFailed(), )
        mock_registry.get.return_value = mock_handler

        worker._run()
        self.assertEqual(mocked_logger.error.call_args_list, [
            call('Error while processing {} {}: {}'.format(
                self.worker_config['resource'][:-1], doc['id']['id'],
                'Not described error yet.'),
                 extra={
                     'JOURNAL_TENDER_ID': doc['id']['id'],
                     'MESSAGE_ID': 'bridge_worker_exception'
                 })
        ])
        check_queue_item = (queue_item[0] + 1, queue_item[1]
                            )  # priority is increased
        self.assertEquals(self.retry_queue.get(), check_queue_item)

        # Try process resource with Exception
        self.api_clients_queue.put(api_client_dict)
        self.queue.put(queue_item)
        mock_get_from_public.return_value = doc
        worker.exit.__nonzero__.side_effect = [False, True]

        mock_handler = MagicMock()
        mock_handler.process_resource.side_effect = (Exception(), )
        mock_registry.get.return_value = mock_handler
        worker._run()

        self.assertEqual(mocked_logger.error.call_args_list[1:], [
            call('Error while processing {} {}: {}'.format(
                self.worker_config['resource'][:-1], doc['id']['id'], ''),
                 extra={
                     'JOURNAL_TENDER_ID': doc['id']['id'],
                     'MESSAGE_ID': 'bridge_worker_exception'
                 })
        ])
        check_queue_item = (queue_item[0] + 1, queue_item[1]
                            )  # priority is increased
        self.assertEquals(self.retry_queue.get(), check_queue_item)

        #  No resource item
        self.api_clients_queue.put(api_client_dict)
        self.queue.put(queue_item)
        mock_get_from_public.return_value = None
        worker.exit.__nonzero__.side_effect = [False, True]

        mock_handler = MagicMock()
        mock_handler.process_resource.side_effect = (Exception(), )
        mock_registry.get.return_value = mock_handler
        worker._run()

        self.assertEquals(self.queue.empty(), True)
        self.assertEquals(self.retry_queue.empty(), True)

    @patch('openprocurement.bridge.basic.workers.datetime')
    @patch('openprocurement.bridge.basic.workers.logger')
    def test_log_timeshift(self, mocked_logger, mocked_datetime):
        worker = AgreementWorker(
            'api_clients_queue', 'resource_items_queue', 'db', {
                'worker_config': {
                    'bulk_save_limit': 1,
                    'bulk_save_interval': 1
                },
                'resource': 'tenders'
            }, 'retry_resource_items_queue')

        time_var = datetime.datetime.now(iso8601.UTC)

        mocked_datetime.now.return_value = time_var
        resource_item = {'id': '0' * 32, 'dateModified': time_var.isoformat()}
        worker.log_timeshift(resource_item)

        self.assertEqual(mocked_logger.debug.call_args_list, [
            call('{} {} timeshift is {} sec.'.format(
                self.worker_config['resource'][:-1], resource_item['id'], 0.0),
                 extra={'DOCUMENT_TIMESHIFT': 0.0})
        ])
Beispiel #43
0
class GeventedConnPool(object):
    closed = False
    maxsize = None
    pool = None
    _connectargs = None

    def __init__(self, maxsize=8, **connectargs):
        self.maxsize = maxsize
        self.pool = Queue()
        self.lock = gevent.lock.BoundedSemaphore(maxsize)
        self._connectargs = connectargs

    def _connect(self):
        return psycopg2.connect(**self._connectargs)

    def get(self):
        if self.closed:
            raise psycopg2.pool.PoolError("connection pool is closed")
        self.lock.acquire()
        try:
            conn = self.pool.get_nowait()
            if conn.closed or conn.status != psycopg2.extensions.STATUS_READY:
                self.lock.release()
                logger.info("Conn isn't ready: %r", conn.status)
                conn.close()
                return self.get()
            return conn
        except gevent.queue.Empty:
            try:
                return self._connect()
            except:
                self.lock.release()
                raise

    def put(self, conn):
        assert conn is not None
        try:
            if self.closed:
                conn.close()
            if conn.closed:
                # If the connection is closed, we just discard it.
                self.lock.release()
                return

            # Return the connection into a consistent state before putting
            # it back into the pool
            status = conn.get_transaction_status()
            if status == psycopg2.extensions.TRANSACTION_STATUS_UNKNOWN:
                # server connection lost
                conn.close()
                self.lock.release()
                return
            elif status != psycopg2.extensions.TRANSACTION_STATUS_IDLE:
                # connection in error or in transaction
                conn.rollback()
        except StandardError:
            logger.exception("Failed in put")
            self.lock.release()
            gevent.get_hub().handle_error(conn, *sys.exc_info())
        else:
            gevent.spawn(self._reset_and_return, conn)

    def _reset_and_return(self, conn):
        try:
            if self.closed:
                conn.close()
            if not conn.closed:
                conn.reset()
                self.pool.put(conn)
        except:
            logger.exception("Failed in reset")
            gevent.get_hub().handle_error(conn, *sys.exc_info())
        finally:
            self.lock.release()

    def closeall(self, timeout=5):
        logger.info("Closing all connections: %d", self.pool.qsize())
        self.closed = True
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass

        if self.lock.counter != self.maxsize:
            gevent.wait(timeout=timeout)
        assert self.lock.counter == self.maxsize
        self.closed = False

    @contextlib.contextmanager
    def connection(self,
                   isolation_level=None,
                   autocommit=None,
                   readonly=False):
        conn = self.get()
        try:
            if isolation_level is not None and isolation_level != conn.isolation_level:
                conn.set_isolation_level(isolation_level)
            if autocommit is not None:
                conn.autocommit = autocommit
            if readonly is not None:
                conn.set_session(readonly=readonly)
            yield conn
            conn.commit()
        finally:
            if conn:
                #self.put(conn)
                gevent.spawn(self.put, conn)

    @contextlib.contextmanager
    def cursor(self, *args, **kwargs):
        connargs = {
            'isolation_level': kwargs.pop('isolation_level', None),
            'autocommit': kwargs.pop('autocommit', None),
            'readonly': kwargs.pop('readonly', None)
        }

        if kwargs.pop('named', False) is True:
            kwargs['name'] = str(uuid.uuid4())
        with self.connection(**connargs) as conn:
            yield conn.cursor(*args, **kwargs)

    def mogrify(self, *args, **kwargs):
        with self.cursor(**kwargs) as cur:
            return cur.mogrify(*args)

    # Some shortcut functions
    def execute(self, *args, **kwargs):
        """like cursor.execute

        kwargs to cursor, positional args to execute
        """
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.rowcount

    def executemany(self, *args, **kwargs):
        """Pasthrough to cursor.executemany

        kwargs to cursor, positional args to executemany"""
        with self.cursor(**kwargs) as cursor:
            cursor.executemany(*args)
            return cursor.rowcount

    def fetchone(self, *args, **kwargs):
        """like cursor.fetchone

        kwargs to cursor, positional args to execute
        """
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchone()

    def fetchall(self, *args, **kwargs):
        """like cursor.fetchall

        kwargs to cursor, positional args to execute
        """
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchall()

    def fetchiter(self, *args, **kwargs):
        """iterate over a cursors results

        kwargs to cursor, positional args to execute
        """
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            for f in cursor:
                yield f
class BitcoinDepositService(object):
    def __init__(self,
                 _config=None,
                 _persistent=None,
                 _watchlist=None,
                 _balance_service=None):
        self.tasks = Queue()

        if _config:
            self.config = _config
        else:
            _config = RawConfigParser()
            _config.read('config.cfg')
            self.config = _config

        # Persistent is for saving and loading the progress,
        # the data can be saved in a local file or the database
        self.persistent = \
            _persistent or FilePersistent(_start=self.config.getint('deposit', 'start_block'))

        # TODO: extract transaction fetcher
        self.base_url = self.config.get('deposit', 'base_url')
        self.session = requests.Session()

        self.watchlist = _watchlist
        self.balance_service = _balance_service

    def get_block(self, block_height='latest'):
        """
        Get the detail info of a block

        :param block_height: either an integer or 'latest'
        :return: the block dict returned by BTC.com
        """
        url = '%s/block/%s' % (self.base_url, block_height)
        rv = self.session.get(url).json()

        if rv['err_msg']:
            raise Exception(rv['err_msg'])
        else:
            return rv['data']

    def generate_block_transaction_urls(self, block_height):
        # Get the total count of this block
        url = '%s/block/%s/tx' % (self.base_url, block_height)
        rv = self.session.get(url)
        data = rv.json()['data']

        page_size = data['pagesize']
        total_count = data['total_count']

        # Get each pages
        for i in range(1, int(total_count / page_size) + 1):
            paginated_url = url + '?page=' + str(i)
            self.tasks.put_nowait(paginated_url)
            gevent.sleep(.5)

    def process_transaction(self, transaction):

        outputs = transaction['outputs']

        for output in outputs:
            if output['spent_by_tx']:
                logger.info('%s|spent', transaction['block_height'])
            else:
                logger.info('%s|a:%s|v:%d', transaction['block_height'],
                            output['addresses'], output['value'])

                if len(output['addresses']) > 1:
                    logger.error('more than one output addresses')
                    continue

                if len(output['addresses']) == 0:
                    logger.error('no address found')
                    continue

                address = output['addresses'][0]
                value = output['value']
                tx_id = transaction['hash']

                if self.watchlist.exists(address):
                    try:
                        self.deposit(address, value, tx_id)
                        logger.info('deposit %s to %s: OK', value, address)
                    except:
                        logger.error('deposit %s to %s: failed', value,
                                     address)

    def worker(self):
        while not self.tasks.empty():
            url = self.tasks.get()
            rv = self.session.get(url)

            if rv.status_code != 200:
                # Hit the rate limit, retry.
                # Note that there is not max retry times.
                self.tasks.put_nowait(url)

                # Wait for the next URL
                continue

            # All is well, process the transactions
            data = rv.json()['data']

            transactions = data['list']
            for transaction in transactions:
                self.process_transaction(transaction)

            gevent.sleep(.5)

    def run(self):

        # Pick up the progress
        block_height = self.persistent.get_last_processed_block() + 1

        min_confirmation_count = self.config.getint('deposit',
                                                    'min_confirmation_count')

        # Main event loop
        while True:
            try:
                block = self.get_block(block_height)
                logger.info('New block: %d', block_height)

                if block['confirmations'] < min_confirmation_count:
                    raise WorkerConfirmException(
                        'Confirmation is less than required minimum: %d',
                        min_confirmation_count)

                sleep(.5)

                gevent.spawn(self.generate_block_transaction_urls,
                             block_height).join()
                gevent.spawn(self.worker).join()

                # Save the checkpoint
                self.persistent.set_last_processed_block(block_height)

                # increase block height
                block_height += 1
            except WorkerConfirmException as e:
                pprint(e)
                sleep(self.config.getfloat('deposit', 'block_times'))

    def deposit(self, address, value, tx_id):
        self.balance_service.deposit(address, value, tx_id)
Beispiel #45
0
class LoadBalancer:
    def __init__(self, ctx):
        """Initialize a LoadBalancer object, which manages workflow execution.

        Args:
            ctx (Context object): A Context object, shared with the Receiver thread.
        """
        self.available_workers = []
        self.workflow_comms = {}
        self.thread_exit = False
        self.pending_workflows = Queue()

        self.ctx = ctx
        server_secret_file = os.path.join(
            core.config.paths.zmq_private_keys_path, "server.key_secret")
        server_public, server_secret = auth.load_certificate(
            server_secret_file)

        self.request_socket = self.ctx.socket(zmq.ROUTER)
        self.request_socket.curve_secretkey = server_secret
        self.request_socket.curve_publickey = server_public
        self.request_socket.curve_server = True
        self.request_socket.bind(REQUESTS_ADDR)

        self.comm_socket = self.ctx.socket(zmq.ROUTER)
        self.comm_socket.curve_secretkey = server_secret
        self.comm_socket.curve_publickey = server_public
        self.comm_socket.curve_server = True
        self.comm_socket.bind(COMM_ADDR)

        gevent.sleep(2)

    def manage_workflows(self):
        """Manages the workflows to be executed and the workers. It waits for the server to submit a request to
        execute a workflow, and then passes the workflow off to an available worker, once one becomes available.
        """
        while True:
            if self.thread_exit:
                break
            # There is a worker available and a workflow in the queue, so pop it off and send it to the worker
            if self.available_workers and not self.pending_workflows.empty():
                workflow = self.pending_workflows.get()
                worker = self.available_workers.pop()
                self.workflow_comms[workflow['execution_uid']] = worker
                self.request_socket.send_multipart(
                    [worker, b"", asbytes(json.dumps(workflow))])
            # If there is a worker available but no pending workflows, then see if there are any other workers
            # available, but do not block in case a workflow becomes available
            else:
                try:
                    worker, empty, ready = self.request_socket.recv_multipart(
                        flags=zmq.NOBLOCK)
                    if ready == b"Ready" or ready == b"Done":
                        self.available_workers.append(worker)
                except zmq.ZMQError:
                    gevent.sleep(0.1)
                    continue
        self.request_socket.close()
        self.comm_socket.close()
        return

    def add_workflow(self, workflow_json):
        """Adds a workflow to the queue to be executed.

        Args:
            workflow_json (dict): Dict representation of a workflow, along with some additional fields necessary for
                reconstructing the workflow.
        """
        self.pending_workflows.put(workflow_json)

    def pause_workflow(self, workflow_execution_uid):
        """Pauses a workflow currently executing.

        Args:
            workflow_execution_uid (str): The execution UID of the workflow.
        """
        logger.info('Pausing workflow {0}'.format(workflow_execution_uid))
        if workflow_execution_uid in self.workflow_comms:
            self.comm_socket.send_multipart(
                [self.workflow_comms[workflow_execution_uid], b'', b'Pause'])

    def resume_workflow(self, workflow_execution_uid):
        """Resumes a workflow that has previously been paused.

        Args:
            workflow_execution_uid (str): The execution UID of the workflow.
        """
        logger.info('Resuming workflow {0}'.format(workflow_execution_uid))
        if workflow_execution_uid in self.workflow_comms:
            self.comm_socket.send_multipart(
                [self.workflow_comms[workflow_execution_uid], b'', b'Resume'])

    def send_data_to_trigger(self, data_in, workflow_uids, inputs={}):
        """Sends the data_in to the workflows specified in workflow_uids.

        Args:
            data_in (dict): Data to be used to match against the triggers for a Step awaiting data.
            workflow_uids (list[str]): A list of workflow execution UIDs to send this data to.
            inputs (dict, optional): An optional dict of inputs to update for a Step awaiting data for a trigger.
                Defaults to None.
        """
        data = dict()
        data['data_in'] = data_in
        data['inputs'] = inputs
        for uid in workflow_uids:
            if uid in self.workflow_comms:
                self.comm_socket.send_multipart([
                    self.workflow_comms[uid], b'',
                    str.encode(json.dumps(data))
                ])
Beispiel #46
0
class Peer(gevent.Greenlet):

    def __init__(self, peermanager, node_info):
        gevent.Greenlet.__init__(self)
        print ("Constructing basic configs")
        self.peermanager = peermanager
        self.peerID = node_info['ID']
        self.peerAddr = node_info['addr']
        self.ping_interval = self.peermanager.configs['p2p']['pingtime']        # timeout
        self.timeout = self.peermanager.configs['p2p']['timeout']
        try:
            self.socket = gevent.socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            self.socket.bind(("", 0))
            self.socket.settimeout(self.peermanager.configs['p2p']['timeout'])      # timeout
        except gevent.socket.error as e:
            print('Socket creation error: %s' % e.strerror)
        
        self.myID = peermanager.configs['node']['ID']
        self.myAddr = (self.peermanager.configs['p2p']['listen_host'], self.socket.getsockname()[1])
        self.handler = Protocol(self,self.peermanager.configs['node']['wif'],self.peermanager.configs['node']['pubkey'],self.peermanager.configs['p2p']['num_workers'])
        self.handler.start()
        self.greenlets = dict()
        self.outbox = Queue()
        self.vipbox = Queue()
        self.inbox = Queue()
        self.is_stopped = False
        self.is_pinged = False
        self.last_contact = time.time()
        self.read_ready = gevent.event.Event()
        self.read_ready.set()

    def stop(self):
        if not self.is_stopped:
            print("Trying to stop peer.")
            self.is_stopped = True
            try:
                self.handler.stop()
                for process in self.greenlets.values():
                    try:
                        process.kill()
                    except gevent.GreenletExit:
                        pass
                self.greenlets = None              
            except:
                print('Failed to kill all processes.')
            finally:
                self.peermanager.peers.remove(self)
                self.kill()
    
    def run(self):
        print('Running main loop of peer ', self.peerID)
        self.handler.run()
        self.greenlets['sender'] = gevent.spawn(self.send_loop)
        self.greenlets['receiver'] = gevent.spawn(self.recv_loop)

        while not self.is_stopped:
            self.read_ready.wait()
            try:
                gevent.socket.wait_read(self.socket.fileno())
            except gevent.socket.error as e:
                print('Network error: %s' %e.strerror)
                if e.errno in (errno.EBADF):
                    self.report(("disconnect", dict(type="end_session",reason=e.__str__())))
                    self.stop()
                else:
                    raise e 
            
            try:
                message, addr = self.socket.recvfrom(8192)
                self.peerAddr = addr
            except gevent.socket.error as e:
                print('Network error: %s' %e.strerror)
                if e.errno in (errno.ENETDOWN, errno.ECONNRESET, errno.ETIMEDOUT,errno.EHOSTUNREACH, errno.ECONNABORTED):
                    self.report(("disconnect", dict(type="end_session",reason=e.__str__())))
                    self.stop()
                else:
                    raise e
            
            if message:
                self.last_contact = time.time()
                self.is_pinged = False
                self.inbox.put(message)

    def send_loop(self):
        while not self.is_stopped:
            elapsed = time.time() - self.last_contact
            if elapsed > self.timeout:
                self.send_disconnect('Ping pong timeout')
            elif elapsed > self.ping_interval and not self.is_pinged:
                print("time elapsed:", elapsed)
                self.is_pinged = True
                self.send_ping()
            else:
                if not self.vipbox.empty():
                    self.send(self.vipbox.get())
                if not self.outbox.empty():
                    self.send(self.outbox.get())
            gevent.sleep(0)
        
    def recv_loop(self):
        while not self.is_stopped:
            self.report(self.handler.rQ.get())

    def send(self, packet):
        if not packet:
            print("Missing packet!")
            return
        self.read_ready.clear()

        try:
            self.socket.sendto(packet, self.peerAddr)
        except gevent.socket.error as e:
            print("Error in send! ", e)
            self.report(("disconnect", dict(type="end_session",reason='send error')))
            self.stop()
        except gevent.socket.timeout as e:
            print("Timeout in send! ", e)
            self.report(("disconnect", dict(type="end_session",reason='send timeout')))
            self.stop()       
        self.read_ready.set()

    def send_hello(self):
        self.handler.sQ.put(dict(action=0, payload="Hello, requesting connection."))

    def send_confirm(self):
        self.handler.sQ.put(dict(action=1, payload="Successfully received hello."))

    def send_disconnect(self, msg):
        self.handler.sQ.put(dict(action=2, payload=msg))
        self.report(("disconnect", dict(type="end_session",reason=msg)))
        self.stop()

    def send_packet(self, packet):
        packet = json.dumps(packet)
        self.handler.sQ.put(dict(action=5,payload=packet))

    def send_ping(self):
        self.handler.sQ.put(dict(action=3))
    
    def report(self, rp):
        if rp[0] == "checked":
            self.peermanager.log(self.peerID, 1)
        if rp[0] == "disconnect":
            self.peermanager.log(self.peerID, 0, reasons=rp[1])
            self.stop()
        if rp[0] == "data":
            self.peermanager.log(self.peerID, 1)
            rp[1]['data'] = json.loads(rp[1]['data'])
            self.parse_data(rp[1])
            #self.peermanager.recv_queue.put(rp[1])
    
    def parse_data(self, data):
        method = int(data['data']['method'])
        try:
            self.peermanager.recv_queue[method].put(data)
        except IndexError:
            print ("Illegal method index!")
Beispiel #47
0
class VncIfmapClient(object):

    # * Not all properties in an object needs to be published
    #   to IfMap.
    # * In some properties, not all fields are relevant
    #   to be publised to IfMap.
    # If the property is not relevant at all, define the property
    # with None. If it is partially relevant, then define the fn.
    # which would handcraft the generated xml for the object.
    IFMAP_PUBLISH_SKIP_LIST = {
        # Format - <prop_field> : None | <Handler_fn>
        u"perms2": None,
        u"id_perms": build_idperms_ifmap_obj
    }

    def handler(self, signum, frame):
        file = open("/tmp/api-server-ifmap-cache.txt", "w")
        file.write(pformat(self._id_to_metas))
        file.close()

    def __init__(self, db_client_mgr, ifmap_srv_ip, ifmap_srv_port, uname,
                 passwd, ssl_options):
        self._ifmap_srv_ip = ifmap_srv_ip
        self._ifmap_srv_port = ifmap_srv_port
        self._username = uname
        self._password = passwd
        self._ssl_options = ssl_options
        self._dequeue_greenlet = None
        self._CONTRAIL_XSD = "http://www.contrailsystems.com/vnc_cfg.xsd"
        self._IPERMS_NAME = "id-perms"
        self._NAMESPACES = {
            'env': "http://www.w3.org/2003/05/soap-envelope",
            'ifmap': "http://www.trustedcomputinggroup.org/2010/IFMAP/2",
            'meta':
            "http://www.trustedcomputinggroup.org/2010/IFMAP-METADATA/2",
            'contrail': self._CONTRAIL_XSD
        }

        self._db_client_mgr = db_client_mgr
        self._sandesh = db_client_mgr._sandesh

        ConnectionState.update(
            conn_type=ConnectionType.IFMAP,
            name='IfMap',
            status=ConnectionStatus.INIT,
            message='',
            server_addrs=["%s:%s" % (ifmap_srv_ip, ifmap_srv_port)])
        self._conn_state = ConnectionStatus.INIT
        self._is_ifmap_up = False
        self._queue = Queue(self._get_api_server()._args.ifmap_queue_size)

        self.reset()

        # Set the signal handler
        signal.signal(signal.SIGUSR2, self.handler)

        self._init_conn()
        self._publish_config_root()
        self._health_checker_greenlet =\
               vnc_greenlets.VncGreenlet('VNC IfMap Health Checker',
                                         self._health_checker)

    # end __init__

    @classmethod
    def object_alloc(cls, obj_class, parent_res_type, fq_name):
        res_type = obj_class.resource_type
        my_fqn = ':'.join(fq_name)
        parent_fqn = ':'.join(fq_name[:-1])

        my_imid = 'contrail:%s:%s' % (res_type, my_fqn)
        if parent_fqn:
            if parent_res_type is None:
                err_msg = "Parent: %s type is none for: %s" % (parent_fqn,
                                                               my_fqn)
                return False, (409, err_msg)
            parent_imid = 'contrail:' + parent_res_type + ':' + parent_fqn
        else:  # parent is config-root
            parent_imid = 'contrail:config-root:root'

        # Normalize/escape special chars
        my_imid = escape(my_imid)
        parent_imid = escape(parent_imid)

        return True, (my_imid, parent_imid)

    # end object_alloc

    def object_set(self, obj_class, my_imid, existing_metas, obj_dict):
        update = {}

        # Properties Meta
        for prop_field in obj_class.prop_fields:
            field = obj_dict.get(prop_field)
            if field is None:
                continue
            # construct object of xsd-type and get its xml repr
            # e.g. virtual_network_properties
            prop_field_types = obj_class.prop_field_types[prop_field]
            is_simple = not prop_field_types['is_complex']
            prop_type = prop_field_types['xsd_type']
            # e.g. virtual-network-properties
            prop_meta = obj_class.prop_field_metas[prop_field]

            if prop_field in VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST:
                # Field not relevant, skip publishing to IfMap
                if not VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST[prop_field]:
                    continue
                # Call the handler fn to generate the relevant fields.
                if callable(
                        VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST[prop_field]):
                    prop_xml = VncIfmapClient.IFMAP_PUBLISH_SKIP_LIST[
                        prop_field](prop_field, field)
                    meta = Metadata(prop_meta,
                                    '', {'ifmap-cardinality': 'singleValue'},
                                    ns_prefix='contrail',
                                    elements=prop_xml)
                else:
                    log_str = '%s is marked for partial publish\
                               to Ifmap but handler not defined' % (prop_field)
                    self.config_log(log_str, level=SandeshLevel.SYS_DEBUG)
                    continue
            elif is_simple:
                norm_str = escape(str(field))
                meta = Metadata(prop_meta,
                                norm_str, {'ifmap-cardinality': 'singleValue'},
                                ns_prefix='contrail')
            else:  # complex type
                prop_cls = str_to_class(prop_type, __name__)
                buf = cStringIO.StringIO()
                # perms might be inserted at server as obj.
                # obj construction diff from dict construction.
                if isinstance(field, dict):
                    prop_cls(**field).exportChildren(buf,
                                                     level=1,
                                                     name_=prop_meta,
                                                     pretty_print=False)
                elif isinstance(field, list):
                    for elem in field:
                        if isinstance(elem, dict):
                            prop_cls(**elem).exportChildren(buf,
                                                            level=1,
                                                            name_=prop_meta,
                                                            pretty_print=False)
                        else:
                            elem.exportChildren(buf,
                                                level=1,
                                                name_=prop_meta,
                                                pretty_print=False)
                else:  # object
                    field.exportChildren(buf,
                                         level=1,
                                         name_=prop_meta,
                                         pretty_print=False)
                prop_xml = buf.getvalue()
                buf.close()
                meta = Metadata(prop_meta,
                                '', {'ifmap-cardinality': 'singleValue'},
                                ns_prefix='contrail',
                                elements=prop_xml)

            # If obj is new (existing metas is none) or
            # if obj does not have this prop_meta (or)
            # or if the prop_meta is different from what we have currently,
            # then update
            if (not existing_metas or not prop_meta in existing_metas
                    or ('' in existing_metas[prop_meta]
                        and str(meta) != str(existing_metas[prop_meta]['']))):
                self._update_id_self_meta(update, meta)
        # end for all property types

        # References Meta
        for ref_field in obj_class.ref_fields:
            refs = obj_dict.get(ref_field)
            if not refs:
                continue
            for ref in refs:
                ref_fq_name = ref['to']
                ref_fld_types_list = list(obj_class.ref_field_types[ref_field])
                ref_res_type = ref_fld_types_list[0]
                ref_link_type = ref_fld_types_list[1]
                ref_meta = obj_class.ref_field_metas[ref_field]
                ref_imid = get_ifmap_id_from_fq_name(ref_res_type, ref_fq_name)
                ref_data = ref.get('attr')
                if ref_data:
                    buf = cStringIO.StringIO()
                    attr_cls = str_to_class(ref_link_type, __name__)
                    attr_cls(**ref_data).exportChildren(buf,
                                                        level=1,
                                                        name_=ref_meta,
                                                        pretty_print=False)
                    ref_link_xml = buf.getvalue()
                    buf.close()
                else:
                    ref_link_xml = ''
                meta = Metadata(ref_meta,
                                '', {'ifmap-cardinality': 'singleValue'},
                                ns_prefix='contrail',
                                elements=ref_link_xml)
                self._update_id_pair_meta(update, ref_imid, meta)
        # end for all ref types

        self._publish_update(my_imid, update)
        return (True, '')

    # end object_set

    def object_create(self, obj_ids, obj_dict):
        obj_type = obj_ids['type']
        obj_class = self._db_client_mgr.get_resource_class(obj_type)
        if not 'parent_type' in obj_dict:
            # parent is config-root
            parent_type = 'config-root'
            parent_imid = 'contrail:config-root:root'
        else:
            parent_type = obj_dict['parent_type']
            parent_imid = obj_ids.get('parent_imid', None)

        # Parent Link Meta
        update = {}
        parent_cls = self._db_client_mgr.get_resource_class(parent_type)
        parent_link_meta = parent_cls.children_field_metas.get('%ss' %
                                                               (obj_type))
        if parent_link_meta:
            meta = Metadata(parent_link_meta,
                            '', {'ifmap-cardinality': 'singleValue'},
                            ns_prefix='contrail')
            self._update_id_pair_meta(update, obj_ids['imid'], meta)
            self._publish_update(parent_imid, update)

        (ok, result) = self.object_set(obj_class, obj_ids['imid'], None,
                                       obj_dict)
        return (ok, result)

    # end object_create

    def _object_read_to_meta_index(self, ifmap_id):
        # metas is a dict where key is meta-name and val is list of dict of
        # form [{'meta':meta}, {'id':id1, 'meta':meta}, {'id':id2, 'meta':meta}]
        metas = {}
        if ifmap_id in self._id_to_metas:
            metas = self._id_to_metas[ifmap_id].copy()
        return metas

    # end _object_read_to_meta_index

    def object_update(self, obj_cls, new_obj_dict):
        ifmap_id = get_ifmap_id_from_fq_name(obj_cls.resource_type,
                                             new_obj_dict['fq_name'])
        # read in refs from ifmap to determine which ones become inactive after update
        existing_metas = self._object_read_to_meta_index(ifmap_id)

        if not existing_metas:
            # UPDATE notify queued before CREATE notify, Skip publish to IFMAP.
            return (True, '')

        # remove properties that are no longer active
        props = obj_cls.prop_field_metas
        for prop, meta in props.items():
            if meta in existing_metas and new_obj_dict.get(prop) is None:
                self._delete_id_self_meta(ifmap_id, meta)

        # remove refs that are no longer active
        delete_list = []
        refs = dict(
            (obj_cls.ref_field_metas[rf], obj_cls.ref_field_types[rf][0])
            for rf in obj_cls.ref_fields)
        #refs = {'virtual-network-qos-forwarding-class': 'qos-forwarding-class',
        #        'virtual-network-network-ipam': 'network-ipam',
        #        'virtual-network-network-policy': 'network-policy',
        #        'virtual-network-route-table': 'route-table'}
        for meta, ref_res_type in refs.items():
            old_set = set(existing_metas.get(meta, {}).keys())
            new_set = set()
            ref_obj_type = self._db_client_mgr.get_resource_class(
                ref_res_type).object_type
            for ref in new_obj_dict.get(ref_obj_type + '_refs', []):
                to_imid = get_ifmap_id_from_fq_name(ref_res_type, ref['to'])
                new_set.add(to_imid)

            for inact_ref in old_set - new_set:
                delete_list.append((inact_ref, meta))

        if delete_list:
            self._delete_id_pair_meta_list(ifmap_id, delete_list)

        (ok, result) = self.object_set(obj_cls, ifmap_id, existing_metas,
                                       new_obj_dict)
        return (ok, result)

    # end object_update

    def object_delete(self, obj_ids):
        ifmap_id = obj_ids['imid']
        parent_imid = obj_ids.get('parent_imid')
        existing_metas = self._object_read_to_meta_index(ifmap_id)
        meta_list = []
        for meta_name, meta_infos in existing_metas.items():
            # Delete all refs/links in the object.
            # Refs are identified when the key is a non-empty string.
            meta_list.extend([(k, meta_name) for k in meta_infos if k != ''])

        if parent_imid:
            # Remove link from parent
            meta_list.append((parent_imid, None))

        if meta_list:
            self._delete_id_pair_meta_list(ifmap_id, meta_list)

        # Remove all property metadata associated with this ident
        self._delete_id_self_meta(ifmap_id, None)

        return (True, '')

    # end object_delete

    def _init_conn(self):
        self._mapclient = client(
            ("%s" % (self._ifmap_srv_ip), "%s" % (self._ifmap_srv_port)),
            self._username, self._password, self._NAMESPACES,
            self._ssl_options)

        connected = False
        while not connected:
            try:
                resp_xml = self._mapclient.call('newSession',
                                                NewSessionRequest())
            except socket.error as e:
                msg = 'Failed to establish IF-MAP connection: %s' % str(e)
                self.config_log(msg, level=SandeshLevel.SYS_WARN)
                time.sleep(3)
                continue

            resp_doc = etree.parse(StringIO.StringIO(resp_xml))
            err_codes = resp_doc.xpath(
                '/env:Envelope/env:Body/ifmap:response/errorResult/@errorCode',
                namespaces=self._NAMESPACES)
            if not err_codes:
                connected = True
            else:
                msg = "Failed to establish IF-MAP connection: %s" % err_codes
                self.config_log(msg, level=SandeshLevel.SYS_WARN)
                session_id = self._mapclient.get_session_id()
                try:
                    self._mapclient.call('endSession',
                                         EndSessionRequest(session_id))
                except socket.error as e:
                    msg = "Failed to end the IF-MAP session %s: %s" %\
                          (session_id, str(e))
                    self.config_log(msg, level=SandeshLevel.SYS_WARN)
                time.sleep(3)

        ConnectionState.update(conn_type=ConnectionType.IFMAP,
                               name='IfMap',
                               status=ConnectionStatus.UP,
                               message='',
                               server_addrs=[
                                   "%s:%s" %
                                   (self._ifmap_srv_ip, self._ifmap_srv_port)
                               ])
        self._conn_state = ConnectionStatus.UP
        msg = 'IFMAP connection ESTABLISHED'
        self.config_log(msg, level=SandeshLevel.SYS_NOTICE)

        self._mapclient.set_session_id(
            newSessionResult(resp_xml).get_session_id())
        self._mapclient.set_publisher_id(
            newSessionResult(resp_xml).get_publisher_id())

    # end _init_conn

    def _get_api_server(self):
        return self._db_client_mgr._api_svr_mgr

    # end _get_api_server

    def reset(self):
        self._id_to_metas = {}
        while not self._queue.empty():
            self._queue.get_nowait()

        if (self._dequeue_greenlet is not None
                and gevent.getcurrent() != self._dequeue_greenlet):
            self._dequeue_greenlet.kill()
        self._dequeue_greenlet =\
              vnc_greenlets.VncGreenlet("VNC IfMap Dequeue",
                                        self._ifmap_dequeue_task)

    # end reset

    def _publish_config_root(self):
        # Remove all resident data
        result = ifmap_wipe(self._mapclient)
        if result is None:
            msg = "Cannot purge the IF-MAP server before publishing root graph"
            self.config_log(msg, level=SandeshLevel.SYS_WARN)
        # Build default config-root
        buf = cStringIO.StringIO()
        perms = Provision.defaults.perms
        perms.exportChildren(buf, level=1, pretty_print=False)
        id_perms_xml = buf.getvalue()
        buf.close()
        update = {}
        meta = Metadata(self._IPERMS_NAME,
                        '', {'ifmap-cardinality': 'singleValue'},
                        ns_prefix='contrail',
                        elements=id_perms_xml)
        self._update_id_self_meta(update, meta)
        self._publish_update("contrail:config-root:root", update)

    # end _publish_config_root

    def config_log(self, msg, level):
        self._db_client_mgr.config_log(msg, level)

    # end config_log

    @ignore_exceptions
    def _generate_ifmap_trace(self, oper, body):
        req_id = get_trace_id()
        ifmap_trace = IfmapTrace(request_id=req_id)
        ifmap_trace.operation = oper
        ifmap_trace.body = body

        return ifmap_trace

    # end _generate_ifmap_trace

    def _publish_to_ifmap_enqueue(self, oper, oper_body, do_trace=True):
        # safety check, if we proceed ifmap-server reports error
        # asking for update|delete in publish
        if not oper_body:
            return
        self._queue.put((oper, oper_body, do_trace))

    # end _publish_to_ifmap_enqueue

    def _ifmap_dequeue_task(self):
        while True:
            try:
                self._publish_to_ifmap_dequeue()
            except Exception as e:
                tb = detailed_traceback()
                self.config_log(tb, level=SandeshLevel.SYS_ERR)

    def _publish_to_ifmap_dequeue(self):
        def _publish(requests, traces, publish_discovery=False):
            if not requests:
                return
            ok = False
            # Continue to trying publish requests until the queue is full.
            # When queue is full, ifmap is totally resync from db
            while not ok:
                ok, err_msg = self._publish_to_ifmap(''.join(requests))
                if ok:
                    trace_msg(traces, 'IfmapTraceBuf', self._sandesh)
                else:
                    trace_msg(traces,
                              'IfmapTraceBuf',
                              self._sandesh,
                              error_msg=err_msg)
                if publish_discovery and ok:
                    self._get_api_server().publish_ifmap_to_discovery()
                    self._is_ifmap_up = True
                if not ok:
                    msg = ("%s. IF-MAP sending queue size: %d/%d" %
                           (err_msg, self._queue.qsize(),
                            self._get_api_server()._args.ifmap_queue_size))
                    self.config_log(msg, level=SandeshLevel.SYS_WARN)
                    gevent.sleep(1)

        # end _publish

        while True:
            # block until there is data in the queue
            (oper, oper_body, do_trace) = self._queue.get()
            requests = []
            requests_len = 0
            traces = []
            while True:
                # drain the queue till empty or max message size
                # or change of oper because ifmap does not like
                # different operations in same message
                if oper == 'publish_discovery':
                    _publish(requests, traces, True)
                    break
                if do_trace:
                    trace = self._generate_ifmap_trace(oper, oper_body)
                    traces.append(trace)
                requests.append(oper_body)
                requests_len += len(oper_body)
                if (requests_len >
                        self._get_api_server()._args.ifmap_max_message_size):
                    _publish(requests, traces)
                    break
                old_oper = oper
                try:
                    (oper, oper_body, do_trace) = self._queue.get_nowait()
                    if oper != old_oper:
                        _publish(requests, traces)
                        requests = []
                        requests_len = 0
                        traces = []
                        continue
                except Empty:
                    _publish(requests, traces)
                    break

    # end _publish_to_ifmap_dequeue

    def _publish_to_ifmap(self, oper_body):
        try:
            not_published = True
            retry_count = 0
            resp_xml = None
            while not_published:
                sess_id = self._mapclient.get_session_id()
                req_xml = PublishRequest(sess_id, oper_body)
                resp_xml = self._mapclient.call('publish', req_xml)

                resp_doc = etree.parse(StringIO.StringIO(resp_xml))
                err_codes = resp_doc.xpath(
                    '/env:Envelope/env:Body/ifmap:response/errorResult/@errorCode',
                    namespaces=self._NAMESPACES)
                if err_codes:
                    if retry_count == 0:
                        log_str = 'Error publishing to ifmap, req: %s, resp: %s' \
                                  %(req_xml, resp_xml)
                        self.config_log(log_str, level=SandeshLevel.SYS_ERR)

                    ConnectionState.update(
                        conn_type=ConnectionType.IFMAP,
                        name='IfMap',
                        status=ConnectionStatus.INIT,
                        message='Session lost, renew it',
                        server_addrs=[
                            "%s:%s" %
                            (self._ifmap_srv_ip, self._ifmap_srv_port)
                        ])
                    self._conn_state = ConnectionStatus.INIT
                    self._is_ifmap_up = False
                    retry_count = retry_count + 1
                    self._init_conn()

                    if self._ifmap_restarted():
                        msg = "IF-MAP servers restarted, re-populate it"
                        self.config_log(msg, level=SandeshLevel.SYS_ERR)

                        self.reset()
                        self._get_api_server().publish_ifmap_to_discovery(
                            'down', msg)

                        self._publish_config_root()
                        self._db_client_mgr.db_resync()
                        self._publish_to_ifmap_enqueue('publish_discovery', 1)

                else:  # successful publish
                    not_published = False
                    break
            # end while not_published

            if retry_count:
                log_str = 'Success publishing to ifmap after %d tries' \
                          %(retry_count)
                self.config_log(log_str, level=SandeshLevel.SYS_ERR)

            return True, resp_xml
        except Exception as e:
            # Failed to publish the operation due to unknown error.
            # Probably a connection issue with the ifmap server.
            msg = "Failed to publish request %s: %s" % (oper_body, str(e))
            return False, msg

    # end _publish_to_ifmap

    def _build_request(self, id1_name, id2_name, meta_list, delete=False):
        request = ''
        id1 = unicode(
            Identity(name=id1_name, type="other", other_type="extended"))
        if id2_name != 'self':
            id2 = unicode(
                Identity(name=id2_name, type="other", other_type="extended"))
        else:
            id2 = None
        for m in meta_list:
            if delete:
                filter = unicode(m) if m else None
                op = PublishDeleteOperation(id1=id1, id2=id2, filter=filter)
            else:
                op = PublishUpdateOperation(id1=id1,
                                            id2=id2,
                                            metadata=unicode(m),
                                            lifetime='forever')
            request += unicode(op)
        return request

    def _delete_id_self_meta(self, self_imid, meta_name):
        contrail_metaname = 'contrail:' + meta_name if meta_name else None
        del_str = self._build_request(self_imid, 'self', [contrail_metaname],
                                      True)
        self._publish_to_ifmap_enqueue('delete', del_str)

        try:

            # del meta from cache and del id if this was last meta
            if meta_name:
                del self._id_to_metas[self_imid][meta_name]
                if not self._id_to_metas[self_imid]:
                    del self._id_to_metas[self_imid]
            else:
                del self._id_to_metas[self_imid]

        except KeyError:
            # Case of delete received for an id which we do not know about.
            # Could be a case of duplicate delete.
            # There is nothing for us to do here. Just log and proceed.
            msg = "Delete received for unknown imid(%s) meta_name(%s)." % \
                  (self_imid, meta_name)
            self.config_log(msg, level=SandeshLevel.SYS_DEBUG)

    # end _delete_id_self_meta

    def _delete_id_pair_meta_list(self, id1, meta_list):
        del_str = ''
        for id2, metadata in meta_list:
            contrail_metadata = 'contrail:' + metadata if metadata else None
            del_str += self._build_request(id1, id2, [contrail_metadata], True)

        self._publish_to_ifmap_enqueue('delete', del_str)

        # del meta,id2 from cache and del id if this was last meta
        def _id_to_metas_delete(id1, id2, meta_name):
            if id1 not in self._id_to_metas:
                return
            if meta_name not in self._id_to_metas[id1]:
                return
            if not self._id_to_metas[id1][meta_name]:
                del self._id_to_metas[id1][meta_name]
                if not self._id_to_metas[id1]:
                    del self._id_to_metas[id1]
                return

            # if meta is prop, noop
            if id2 in self._id_to_metas[id1][meta_name]:
                del self._id_to_metas[id1][meta_name][id2]

        #end _id_to_metas_delete

        for id2, metadata in meta_list:
            if metadata:
                # replace with remaining refs
                _id_to_metas_delete(id1, id2, metadata)
                _id_to_metas_delete(id2, id1, metadata)
            else:  # no meta specified remove all links from id1 to id2
                for meta_name in self._id_to_metas.get(id1, {}).keys():
                    _id_to_metas_delete(id1, id2, meta_name)
                for meta_name in self._id_to_metas.get(id2, {}).keys():
                    _id_to_metas_delete(id2, id1, meta_name)

    # end _delete_id_pair_meta_list

    def _update_id_self_meta(self, update, meta):
        """ update: dictionary of the type
                update[<id> | 'self'] = list(metadata)
        """
        mlist = update.setdefault('self', [])
        mlist.append(meta)

    # end _update_id_self_meta

    def _update_id_pair_meta(self, update, to_id, meta):
        mlist = update.setdefault(to_id, [])
        mlist.append(meta)

    # end _update_id_pair_meta

    def _publish_update(self, self_imid, update):
        requests = []
        self_metas = self._id_to_metas.setdefault(self_imid, {})
        for id2, metalist in update.items():
            request = self._build_request(self_imid, id2, metalist)

            # remember what we wrote for diffing during next update
            old_metalist = []
            for m in metalist:
                meta_name = m._Metadata__name[9:]

                # Objects have two types of members - Props and refs/links.
                # Props are cached in id_to_metas as
                #        id_to_metas[self_imid][meta_name]['']
                #        (with empty string as key)

                # Links are cached in id_to_metas as
                #        id_to_metas[self_imid][meta_name][id2]
                #        id2 is used as a key

                if id2 == 'self':
                    self_metas[meta_name] = {'': m}
                    continue

                if meta_name in self_metas:
                    old_metalist.append(self_metas[meta_name])
                    # Update the link/ref
                    self_metas[meta_name][id2] = m
                else:
                    # Create a new link/ref
                    self_metas[meta_name] = {id2: m}

                # Reverse linking from id2 to id1
                self._id_to_metas.setdefault(id2, {})

                if meta_name in self._id_to_metas[id2]:
                    self._id_to_metas[id2][meta_name][self_imid] = m
                else:
                    self._id_to_metas[id2][meta_name] = {self_imid: m}

            old_request = self._build_request(self_imid, id2, old_metalist)
            if request != old_request:
                requests.append(request)

        upd_str = ''.join(requests)
        self._publish_to_ifmap_enqueue('update', upd_str)

    # end _publish_update

    def _ifmap_restarted(self):
        return not entity_is_present(self._mapclient, 'config-root', ['root'])

    def _health_checker(self):
        while True:
            try:
                # do the healthcheck only if we are connected
                if self._conn_state == ConnectionStatus.DOWN:
                    continue
                meta = Metadata('display-name',
                                '', {'ifmap-cardinality': 'singleValue'},
                                ns_prefix='contrail',
                                elements='')
                request_str = self._build_request('healthcheck', 'self',
                                                  [meta])
                self._publish_to_ifmap_enqueue('update',
                                               request_str,
                                               do_trace=False)

                # Confirm the existence of the following default global entities in IFMAP.
                search_list = [
                    ('global-system-config', ['default-global-system-config']),
                ]
                for type, fq_name in search_list:
                    if not entity_is_present(self._mapclient, type, fq_name):
                        raise Exception("%s not found in IFMAP DB" %
                                        ':'.join(fq_name))

                # If we had unpublished the IFMAP server to discovery server earlier
                # publish it back now since it is valid now.
                if not self._is_ifmap_up:
                    self._get_api_server().publish_ifmap_to_discovery('up', '')
                    self._is_ifmap_up = True
                    ConnectionState.update(
                        conn_type=ConnectionType.IFMAP,
                        name='IfMap',
                        status=ConnectionStatus.UP,
                        message='',
                        server_addrs=[
                            "%s:%s" %
                            (self._ifmap_srv_ip, self._ifmap_srv_port)
                        ])
            except Exception as e:
                log_str = 'IFMAP Healthcheck failed: %s' % (str(e))
                self.config_log(log_str, level=SandeshLevel.SYS_ERR)
                if self._is_ifmap_up:
                    self._get_api_server().publish_ifmap_to_discovery(
                        'down', 'IFMAP DB - Invalid state')
                    self._is_ifmap_up = False
                    ConnectionState.update(
                        conn_type=ConnectionType.IFMAP,
                        name='IfMap',
                        status=ConnectionStatus.DOWN,
                        message='Invalid IFMAP DB State',
                        server_addrs=[
                            "%s:%s" %
                            (self._ifmap_srv_ip, self._ifmap_srv_port)
                        ])
            finally:
                gevent.sleep(
                    self._get_api_server().get_ifmap_health_check_interval())
Beispiel #48
0
class CMSscan(object):
    def __init__(self, url):
        self.q = Queue()
        self.url = url.rstrip("/")
        fp = open(os.path.dirname(__file__) + '\\..\\data\\data.json',
                  'r',
                  encoding='gbk')
        webdata = json.load(fp)
        for i in webdata:
            self.q.put(i)
        fp.close()
        self.nums = "web指纹总数:%d" % len(webdata)
        # print("web指纹总数:%d"%len(webdata))

    def _GetMd5(self, body):
        md5 = hashlib.md5()
        md5.update(body)
        return md5.hexdigest()

    def _clearQueue(self):
        while not self.q.empty():
            self.q.get()

    def _worker(self):
        data = self.q.get()
        scan_url = self.url + data["url"]
        try:
            r = requests.get(scan_url, timeout=20)
            if (r.status_code != 200):
                return
            rtext = r.text
            if rtext is None:
                return
        except:
            rtext = ''

        if data["re"]:
            if (rtext.find(data["re"]) != -1):
                result = data["name"]
                # print("CMS:%s 判定位置:%s 正则匹配:%s" % (result, scan_url, data["re"]))
                self.resultout = "CMS:%s 判定位置:%s 正则匹配:%s" % (result, scan_url,
                                                             data["re"])
                self._clearQueue()
                return True
        else:
            md5 = self._GetMd5(rtext)
            if (md5 == data["md5"]):
                result = data["name"]
                # print("CMS:%s 判定位置:%s md5:%s" % (result, scan_url, data["md5"]))
                self.resultout = "CMS:%s 判定位置:%s md5:%s" % (result, scan_url,
                                                            data["md5"])
                self._clearQueue()
                return True

    def _boss(self):
        while not self.q.empty():
            self._worker()

    def outputdatalen(self):
        return self.nums

    def outputreuslt(self):
        return self.resultout

    def runtime(self, maxsize=100):
        start = time.clock()
        allr = [gevent.spawn(self._boss) for i in range(maxsize)]
        gevent.joinall(allr)
        end = time.clock()
        # print("执行用时: %f s" % (end - start))
        self.timeout = "执行用时: %f s" % (end - start)
        return self.timeout
Beispiel #49
0
class gwhatweb(object):
    def __init__(self, url):
        self.tasks = Queue()
        self.url = url.rstrip("/")
        fp = open('file/data.json')
        webdata = json.load(fp, encoding="utf-8")
        for i in webdata:
            self.tasks.put(i)
        fp.close()
        print("webdata total:%d" % len(webdata))

    def _GetMd5(self, body):
        m2 = hashlib.md5()
        m2.update(body)
        return m2.hexdigest()

    def _clearQueue(self):
        while not self.tasks.empty():
            self.tasks.get()

    def _worker(self):
        data = self.tasks.get()
        test_url = self.url + data["url"]
        f2 = open('Cms_scan.txt', 'r+')
        f2.truncate()
        rtext = ''
        try:
            r = requests.get(test_url, timeout=10)
            if (r.status_code != 200):
                return
            rtext = r.text
            if rtext is None:
                return
        except:
            rtext = ''

        if data["re"]:
            if (rtext.find(data["re"]) != -1):
                result = data["name"]
                print("CMS:%s Judge:%s re:%s" % (result, test_url, data["re"]))
                f2.write((result + " " + test_url + " " +
                          data["re"]).encode('utf-8'))
                f2.close()
                self._clearQueue()
                return True
        else:
            md5 = self._GetMd5(rtext)
            if (md5 == data["md5"]):
                result = data["name"]
                print("CMS:%s Judge:%s md5:%s" %
                      (result, test_url, data["md5"]))
                f2.write((result + " "
                          "+test_url+"
                          "" + data["md5"]).encode('utf-8'))
                f2.close()
                self._clearQueue()
                return True

    def _boss(self):
        while not self.tasks.empty():
            self._worker()

    def whatweb(self, maxsize=100):
        start = time.clock()
        allr = [gevent.spawn(self._boss) for i in range(maxsize)]
        gevent.joinall(allr)
        end = time.clock()
        print("cost: %f s" % (end - start))
class Command(collectstatic.Command):
    """
    This command extends Django's `collectstatic` with a `--faster` argument for parallel file copying using gevent.
    The speed improvement is especially helpful for remote storage backends like S3.
    """
    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)
        self.counter = 0
        self.task_queue = None
        self.worker_spawn_method = None
        self.use_multiprocessing = False
        self.found_files = OrderedDict()

    def add_arguments(self, parser):
        super(Command, self).add_arguments(parser)
        parser.add_argument('--faster',
                            action='store_true',
                            default=False,
                            help='Collect static files simultaneously')
        parser.add_argument('--workers',
                            action='store',
                            default=20,
                            help='Amount of simultaneous workers (default=20)')
        parser.add_argument(
            '--use-multiprocessing',
            action='store_true',
            default=False,
            help='Use multiprocessing library instead of gevent')

    def set_options(self, **options):
        self.faster = options.pop('faster')
        self.queue_worker_amount = int(options.pop('workers'))
        self.use_multiprocessing = options.pop('use_multiprocessing')

        if self.use_multiprocessing:
            self.task_queue = multiprocessing.JoinableQueue()
            self.worker_spawn_method = self.mp_spawn
        else:
            self.task_queue = GeventQueue()
            self.worker_spawn_method = self.gevent_spawn

        super(Command, self).set_options(**options)

        if self.faster:
            # The original management command of Django collects all the files and calls the post_process method of
            # the storage backend within the same method. Because we are using a task queue, post processing is started
            # before all files were collected.
            self.post_process_original = self.post_process
            self.post_process = False

    def handle(self, **options):
        start_time = time.time()
        super(Command, self).handle(**options)
        self.log('%s static files copied asynchronously in %is.' %
                 (self.counter, time.time() - start_time),
                 level=1)

    def copy_file(self, path, prefixed_path, source_storage):
        self.file_handler('copy', path, prefixed_path, source_storage)

    def link_file(self, path, prefixed_path, source_storage):
        self.file_handler('link', path, prefixed_path, source_storage)

    def file_handler(self, handler_type, path, prefixed_path, source_storage):
        """
        Create a dict with all kwargs of the `copy_file` or `link_file` method of the super class and add it to
        the queue for later processing.
        """
        if self.faster:
            if prefixed_path not in self.found_files:
                self.found_files[prefixed_path] = (source_storage, path)

            self.task_queue.put({
                'handler_type': handler_type,
                'path': path,
                'prefixed_path': prefixed_path,
                'source_storage': source_storage
            })
            self.counter += 1
        else:
            if handler_type == 'link':
                super(Command, self).link_file(path, prefixed_path,
                                               source_storage)
            else:
                super(Command, self).copy_file(path, prefixed_path,
                                               source_storage)

    def delete_file(self, path, prefixed_path, source_storage):
        """
        We don't need all the file_exists stuff because we have to override all files anyways.
        """
        if self.faster:
            return True
        else:
            return super(Command, self).delete_file(path, prefixed_path,
                                                    source_storage)

    def collect(self):
        """
        Create some concurrent workers that process the tasks simultaneously.
        """
        collected = super(Command, self).collect()
        if self.faster:
            self.worker_spawn_method()
            self.post_processor()
        return collected

    def post_processor(self):
        # Here we check if the storage backend has a post_process
        # method and pass it the list of modified files.
        if self.post_process_original and hasattr(self.storage,
                                                  'post_process'):
            processor = self.storage.post_process(self.found_files,
                                                  dry_run=self.dry_run)
            for original_path, processed_path, processed in processor:
                if isinstance(processed, Exception):
                    self.stderr.write("Post-processing '%s' failed!" %
                                      original_path)
                    # Add a blank line before the traceback, otherwise it's
                    # too easy to miss the relevant part of the error message.
                    self.stderr.write("")
                    raise processed
                if processed:
                    self.log("Post-processed '%s' as '%s'" %
                             (original_path, processed_path),
                             level=1)
                    self.post_processed_files.append(original_path)
                else:
                    self.log("Skipped post-processing '%s'" % original_path)

    def gevent_spawn(self):
        """ Spawn worker threads (using gevent) """
        monkey.patch_all(thread=False)
        joinall([
            spawn(self.gevent_worker) for x in range(self.queue_worker_amount)
        ])

    def gevent_worker(self):
        """
        Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class.
        """
        while not self.task_queue.empty():
            task_kwargs = self.task_queue.get()
            handler_type = task_kwargs.pop('handler_type')

            if handler_type == 'link':
                super(Command, self).link_file(**task_kwargs)
            else:
                super(Command, self).copy_file(**task_kwargs)

    def mp_spawn(self):
        """ Spawn worker processes (using multiprocessing) """
        processes = []
        for x in range(self.queue_worker_amount):
            process = multiprocessing.Process(target=self.mp_worker)
            process.start()
            processes.append(process)
        for process in processes:
            process.join()

    def mp_worker(self):
        """
        Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class.
        """
        while not self.task_queue.empty():
            task_kwargs = self.task_queue.get()
            handler_type = task_kwargs.pop('handler_type')

            if handler_type == 'link':
                super(Command, self).link_file(**task_kwargs)
            else:
                super(Command, self).copy_file(**task_kwargs)

            self.task_queue.task_done()
Beispiel #51
0
class ClientPool(object):
    """Base Interface for Gevent-coroutine based DBAPI2 connection pooling.

    Implementation uses `gevent` Queueing mechanism so we can ensure that
    a DB tasks will be not be claimed from more that one Greenlet.


    Attributes:
        maxsize (int): Greenlet pool size.
    """
    def __init__(self, maxsize=20):
        if not isinstance(maxsize, integer_types):
            raise TypeError('Expected integer, got %r' % (maxsize, ))
        self.maxsize = maxsize
        self.pool = Queue()
        self.size = 0

    def create_connection(self):
        raise NotImplemented("Must implement `create_connection` method.")

    def get(self):
        pool = self.pool
        if self.size >= self.maxsize or pool.qsize():
            return pool.get()
        else:
            self.size += 1
            try:
                new_item = self.create_connection()
            except:
                self.size -= 1
                raise
            return new_item

    def put(self, item):
        self.pool.put(item)

    def closeall(self):
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass

    @contextlib.contextmanager
    def connection(self, isolation_level=None):
        conn = self.get()
        try:
            if isolation_level is not None:
                if conn.isolation_level == isolation_level:
                    isolation_level = None
                else:
                    conn.set_isolation_level(isolation_level)
            yield conn
        except:
            if conn.closed:
                conn = None
                self.closeall()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if conn.closed:
                raise OperationalError(
                    "Cannot commit because connection was closed: %r" % conn)
            conn.commit()
        finally:
            if conn is not None and not conn.closed:
                if isolation_level is not None:
                    conn.set_isolation_level(isolation_level)
                self.put(conn)

    @contextlib.contextmanager
    def cursor(self, *args, **kwargs):
        isolation_level = kwargs.pop('isolation_level', None)
        with self.connection(isolation_level) as conn:
            yield conn.cursor(*args, **kwargs)

    def _rollback(self, conn):
        try:
            conn.rollback()
        except:
            gevent.get_hub().handle_error(conn, *sys.exc_info())
            return
        return conn

    def execute(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.rowcount

    def fetchone(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchone()

    def fetchall(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchall()

    def fetchiter(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            while True:
                items = cursor.fetchmany()
                if not items:
                    break
                for item in items:
                    yield item

    def query(self, query, fetch_opts='many', cursor_type='RealDictCursor'):
        try:
            return getattr(self,
                           dict(CURSOR_FETCH).get(fetch_opts))(
                               *(query, ),
                               cursor_factory=getattr(psycopg2.extras,
                                                      cursor_type))
        except Exception as e:
            raise DBPoolError(e.args)
Beispiel #52
0
class DatabaseConnectionPool(object):
    def __init__(self, maxsize=100, debug=False):
        if not isinstance(maxsize, (int, long)):
            raise TypeError('Expected integer, got %r' % (maxsize, ))
        self.maxsize = maxsize
        self.pool = Queue()
        self.size = 0
        self.debug = debug

    def get(self):
        pool = self.pool
        if self.size >= self.maxsize or pool.qsize():
            return pool.get()
        else:
            self.size += 1
            try:
                new_item = self.create_connection()
            except:
                self.size -= 1
                raise
            return new_item

    def put(self, item):
        self.pool.put(item)

    def closeall(self):
        while not self.pool.empty():
            conn = self.pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass

    @contextlib.contextmanager
    def connection(self):
        conn = self.get()
        try:
            yield conn
        except:
            if conn.closed:
                conn = None
                self.closeall()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if conn.closed:
                raise OperationalError(
                    "Cannot commit because connection was closed: %r" %
                    (conn, ))
            conn.commit()
        finally:
            if conn is not None and not conn.closed:
                self.put(conn)

    @contextlib.contextmanager
    def cursor(self, *args, **kwargs):
        conn = self.get()
        try:
            yield conn.cursor(*args, **kwargs)
        except:
            if conn.closed:
                conn = None
                self.closeall()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if conn.closed:
                raise OperationalError(
                    "Cannot commit because connection was closed: %r" %
                    (conn, ))
            conn.commit()
        finally:
            if conn is not None and not conn.closed:
                self.put(conn)

    def _rollback(self, conn):
        try:
            conn.rollback()
        except:
            gevent.get_hub().handle_error(conn, *sys.exc_info())
            return
        return conn

    def execute(self, *args, **kwargs):
        with self.cursor() as cursor:
            t = ''
            if self.debug:
                t1 = time.time()
            cursor.execute(*args, **kwargs)
            if self.debug:
                t = '%.3f' % (time.time() - t1)
                try:
                    log.debug(u'execute %s %s' % \
                              (t, cursor.mogrify(*args, **kwargs)))
                except:
                    pass

    def executemany(self, *args, **kwargs):
        with self.cursor() as cursor:
            t = ''
            if self.debug:
                t1 = time.time()
            cursor.executemany(*args, **kwargs)
            if self.debug:
                t = '%.3f' % (time.time() - t1)
                try:
                    log.debug(u'executemany %s %s' % \
                              (t, cursor.mogrify(*args, **kwargs)))
                except:
                    pass

    def fetchone(self, *args, **kwargs):
        with self.cursor() as cursor:
            t = ''
            if self.debug:
                t1 = time.time()
            cursor.execute(*args, **kwargs)
            if self.debug:
                t = '%.3f' % (time.time() - t1)
                try:
                    log.debug(u'fetchone %s %s' % \
                              (t, cursor.mogrify(*args, **kwargs)))
                except:
                    pass
            return cursor.fetchone()

    def fetchall(self, *args, **kwargs):
        with self.cursor() as cursor:
            t = ''
            if self.debug:
                t1 = time.time()
            cursor.execute(*args, **kwargs)
            if self.debug:
                t = ' %.3f' % (time.time() - t1)
                try:
                    log.debug(u'fetchall %s %s' %
                              (t, cursor.mogrify(*args, **kwargs)))
                except:
                    pass
            return cursor.fetchall()
Beispiel #53
0
class WorkQueue(object):

    _MAX_QUEUE_SIZE = 1024
    _MAX_WORKLOAD = 16

    def __init__(self,
                 worker,
                 start_runner=None,
                 max_qsize=None,
                 max_work_load=None):
        self.worker = worker
        self._start_runner = start_runner
        self._max_qsize = max_qsize or WorkQueue._MAX_QUEUE_SIZE
        self._max_work_load = max_work_load or WorkQueue._MAX_WORKLOAD
        self._bounded = False
        self._queue = Queue()
        self._qsize = 0
        self._num_enqueues = 0
        self._num_dequeues = 0
        self._drops = 0
        self._high_watermarks = None
        self._low_watermarks = None
        self._hwm_index = -1
        self._lwm_index = -1
        self._runner = Runner(self, self._max_work_load)
        self._max_qlen = 0

    # end __init__

    def set_bounded(self, bounded):
        self._bounded = bounded

    # end set_bounded

    def bounded(self):
        return self._bounded

    # end bounded

    def set_high_watermarks(self, high_wm):
        # weed out duplicates and store the watermarks in sorted order
        self._high_watermarks = list(sorted(set(high_wm)))
        self._set_watermark_indices(-1, -1)

    # end set_high_watermarks

    def high_watermarks(self):
        return self._high_watermarks

    # end high_watermarks

    def set_low_watermarks(self, low_wm):
        # weed out duplicates and store the watermarks in sorted order
        self._low_watermarks = list(sorted(set(low_wm)))
        self._set_watermark_indices(-1, -1)

    # end set_low_watermarks

    def low_watermarks(self):
        return self._low_watermarks

    # end low_watermarks

    def watermark_indices(self):
        return self._hwm_index, self._lwm_index

    # end watermark_indices

    def enqueue(self, work_item):
        if self.increment_queue_size(work_item) > self._max_qlen:
            self._max_qlen = self._qsize
        if self._bounded:
            if self._qsize > self._max_qsize:
                self.decrement_queue_size(work_item)
                self._max_qlen = self._qsize
                self._drops += 1
                return False
        self._num_enqueues += 1
        self._process_high_watermarks()
        self._queue.put(work_item)
        self.may_be_start_runner()
        return True

    # end enqueue

    def dequeue(self):
        try:
            work_item = self._queue.get_nowait()
        except Empty:
            work_item = None
        else:
            self.decrement_queue_size(work_item)
            self._num_dequeues += 1
            self._process_low_watermarks()
        return work_item

    # end dequeue

    def increment_queue_size(self, work_item):
        self._qsize += 1
        return self._qsize

    # end increment_queue_size

    def decrement_queue_size(self, work_item):
        self._qsize -= 1

    # end decrement_queue_size

    def size(self):
        return self._qsize

    # end size

    def max_qlen(self):
        return self._max_qlen

    def may_be_start_runner(self):
        if self._queue.empty() or \
           (self._start_runner and not self._start_runner()):
            return
        self._runner.start()

    # end may_be_start_runner

    def runner_done(self):
        if self._queue.empty() or \
           (self._start_runner and not self._start_runner()):
            return True
        return False

    # end runner_done

    def is_queue_empty(self):
        if self._queue.empty():
            return True
        return False

    # end is_queue_empty

    def num_enqueues(self):
        return self._num_enqueues

    # end num_enqueues

    def num_dequeues(self):
        return self._num_dequeues

    # end num_dequeues

    def drops(self):
        return self._drops

    # end drops

    def runner(self):
        return self._runner

    # end runner

    def _set_watermark_indices(self, hwm_index, lwm_index):
        self._hwm_index = hwm_index
        self._lwm_index = lwm_index

    # end _set_watermark_indices

    def _process_high_watermarks(self):
        if not self._high_watermarks:
            return
        # Check if we have crossed any high watermarks.
        # Find the index of the first element greater than self._qsize
        # in self._high_watermarks.
        index = bisect.bisect_right(self._high_watermarks,
                                    WaterMark(self._qsize, None))
        # If the first element > qsize, then we have not crossed any
        # high watermark.
        if index == 0:
            return
        # We have crossed (index-1)th watermark in the list.
        hwm_index = index - 1
        if hwm_index == self._hwm_index:
            return
        self._set_watermark_indices(hwm_index, hwm_index + 1)
        # Now invoke the watermark callback
        self._high_watermarks[self._hwm_index].callback(self._qsize)

    # end _process_high_watermarks

    def _process_low_watermarks(self):
        if not self._low_watermarks:
            return
        # Check if we have crossed any low watermarks.
        # Find the index of the first element not less than self._qsize
        # in self._low_watermarks.
        index = bisect.bisect_left(self._low_watermarks,
                                   WaterMark(self._qsize, None))
        # If there is no element >= qsize, then we have not crossed any
        # low watermark.
        if index == len(self._low_watermarks):
            return
        lwm_index = index
        if lwm_index == self._lwm_index:
            return
        self._set_watermark_indices(lwm_index - 1, lwm_index)
        # Now invoke the watermark callback
        self._low_watermarks[self._lwm_index].callback(self._qsize)
class Illust_download(Pixiv_Login):
    # def __init__(self):
    #     super(Illust_download, self).__init__()
    #     self.work_1 = Queue()
    #     self.list_photo = []
    #     self.tasks_list_1 = []
    #     self.work_2 = Queue()
    #     self.tasks_list_2 = []
    #     self.work_2_num = 0

    def illustID(self, ides):
        id_list = []
        self.work_1 = Queue()
        if type(ides) == str:
            ides = ides.replace(',', ',')
            id_list = ides.split(',')
        elif type(ides) == list:
            id_list = ides
        for id in id_list:
            # print(id)
            self.work_1.put_nowait(id)

    def illust_info(self):
        self.list_photo = []
        while not self.work_1.empty():
            id_photo = self.work_1.get_nowait()
            url_works = 'https://www.pixiv.net/ajax/illust/{}/pages?lang=zh'.format(
                id_photo)
            headers_works = {
                'referer':
                'https://www.pixiv.net/artworks/{}'.format(id_photo),
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
            }
            res_works = self.session.get(url_works, headers=headers_works)
            print(res_works.status_code)
            json_works = res_works.json()
            for body_works in json_works['body']:
                url_original = body_works['urls']['original']
                self.list_photo.append([url_original, id_photo])

    def get_illust(self):
        self.tasks_list_1 = []
        for reptile in range(10):
            task_1 = gevent.spawn(self.illust_info)
            self.tasks_list_1.append(task_1)
        gevent.joinall(self.tasks_list_1)

    def urls(self):
        self.work_2 = Queue()
        for urls_photo in self.list_photo:
            self.work_2.put_nowait(urls_photo)
        self.work_2_nums = self.work_2.qsize()
        PP.ui.progressBar.setRange(0, self.work_2_nums)

    def download(self, path):
        while not self.work_2.empty():
            urls_photo = self.work_2.get_nowait()
            headers_photo = {
                'referer':
                'https://www.pixiv.net/artworks/{}'.format(urls_photo[1]),
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
            }
            res_photo = self.session.get(urls_photo[0], headers=headers_photo)
            print(res_photo.status_code)
            with open(
                    '{0}/{1}'.format(
                        path, urls_photo[0].replace(
                            'https://i.pximg.net/img-original/img/',
                            '').replace('/', '_')), 'wb') as photo:
                photo.write(res_photo.content)
            self.work_2_num = self.work_2_num + 1
            MS.text_print.emit(PP.ui.textBrowser,
                               '第{}张插画……下载成功'.format(self.work_2_num))
            MS.progress_update.emit(self.work_2_num)

    def run_download(self, path):
        self.tasks_list_2 = []
        self.work_2_num = 0
        MS.progress_update.emit(self.work_2_num)
        for reptile in range(5):
            task_2 = gevent.spawn(self.download, path)
            self.tasks_list_2.append(task_2)
        gevent.joinall(self.tasks_list_2)

    def illust_download(self, path):
        def thread_illust_download(path):
            self.__init__()
            self.Censor_cookies()
            self.get_illust()
            self.urls()
            self.run_download(path)

        thread = Thread(target=thread_illust_download, args=(path, ))
        # thread.setDaemon(True)
        thread.start()

    def author_illust(self, authorID):
        illust_list = []
        id_author = authorID
        url_authorHome = 'https://www.pixiv.net/users/{}'.format(id_author)
        ulr_author = 'https://www.pixiv.net/ajax/user/{}/profile/all?lang=zh'.format(
            id_author)
        # headers_authorHome = {
        #     'referer': 'https://www.pixiv.net/users/{}/following'.format(id_author),
        #     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
        # }
        headers_author = {
            'referer':
            'https://www.pixiv.net/users/{}'.format(id_author),
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
        }
        res_authorHome = self.session.get(url_authorHome,
                                          headers=headers_author)
        res_author = self.session.get(ulr_author, headers=headers_author)
        print(res_authorHome.status_code, res_author.status_code)
        json_author = res_author.json()
        illusts = json_author['body']['illusts']
        soup_authorHome = BeautifulSoup(res_authorHome.text, 'html.parser')
        json_content = soup_authorHome.find(id='meta-preload-data')['content']
        self.userName = json.loads(json_content)['user'][id_author]['name']
        illust_num = len(illusts)
        MS.text_print.emit(PP.ui.textBrowser,
                           '画师{0}共有{1}幅作品'.format(self.userName, illust_num))
        # print('画师{0}共有{1}幅作品'.format(userName, illust_num))
        for illust in illusts:
            illust_list.append(illust)
        PD.illustID(illust_list)

    def Author_iIllust(self, authorID, path):
        def work_Author_iIllust(authorID, path):
            self.__init__()
            self.Censor_cookies()
            self.author_illust(authorID)
            self.get_illust()
            self.urls()
            path = path + '\\' + self.userName
            os.mkdir(path)
            self.run_download(path)

        thread_Author_iIllust = Thread(target=work_Author_iIllust,
                                       args=(
                                           authorID,
                                           path,
                                       ))
        # thread_Author_iIllust.setDaemon(True)
        thread_Author_iIllust.start()

    def Collect_page(self, page_num):
        self.work_3 = Queue()
        self.list_id_photo = []
        page = page_num
        for i in range(1, page + 1):
            self.work_3.put_nowait(str(i))

    def Collection(self):
        while not self.work_3.empty():
            page = self.work_3.get_nowait()
            url_collection = "https://www.pixiv.net/bookmark.php?rest=show&p={}".format(
                page)
            headers = {
                'referer':
                'https://accounts.pixiv.net/login',
                'origin':
                'https://accounts.pixiv.net',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
            }
            res_collection = self.session.get(url_collection, headers=headers)
            print(res_collection.status_code)
            html_collection = res_collection.text
            soup_collection = BeautifulSoup(html_collection, 'html.parser')
            list_collection = soup_collection.find_all(class_='image-item')
            # print(list_collection)
            for collection in list_collection:
                id_collection = collection.find(class_='ui-scroll-view')
                id_photo = id_collection['data-id']
                self.list_id_photo.append(id_photo)

    def Collect_iIllust(self, path):
        def work_Collect_iIllust(path):
            self.__init__()
            self.Censor_cookies()
            self.Collection()
            self.illustID(self.list_id_photo)
            self.get_illust()
            self.urls()
            self.run_download(path)

        thread_Collect_iIllust = Thread(target=work_Collect_iIllust,
                                        args=(path, ))
        thread_Collect_iIllust.start()
Beispiel #55
0
class Actor(object):
    """
    Class that contains a queue and a greenlet serving that queue.
    """

    max_ops_before_yield = 10000
    """Number of calls to self._maybe_yield before it yields"""
    def __init__(self, qualifier=None):
        self._event_queue = Queue()
        self.greenlet = gevent.Greenlet(self._loop)
        self._op_count = 0
        self._current_msg = None
        self.started = False

        # Message being processed; purely for logging.
        self.msg_uuid = None

        # Logging parameters
        self.qualifier = qualifier
        if qualifier:
            self.name = "%s(%s)" % (self.__class__.__name__, qualifier)
        else:
            self.name = self.__class__.__name__
        # Can't use str(self) yet, it might not be ready until subclass
        # constructed.
        _log.info("%s created.", self.name)

    def start(self):
        assert not self.greenlet, "Already running"
        _log.info("Starting %s", self)
        self.started = True
        self.greenlet.start()
        return self

    def _loop(self):
        """
        Main greenlet loop, repeatedly runs _step().  Doesn't return normally.
        """
        actor_storage.class_name = self.__class__.__name__
        actor_storage.name = self.name
        actor_storage.msg_uuid = None

        try:
            while True:
                self._step()
        except:
            _log.exception("Exception killed %s", self)
            raise

    def _step(self):
        """
        Run one iteration of the event loop for this actor.  Mainly
        broken out to allow the UTs to single-step an Actor.

        It also has the beneficial side effect of introducing a new local
        scope so that our variables die before we block next time.
        """
        # Block waiting for work.
        msg = self._event_queue.get()

        batch = [msg]
        batches = []

        if not msg.needs_own_batch:
            # Try to pull some more work off the queue to combine into a
            # batch.
            while not self._event_queue.empty():
                # We're the only ones getting from the queue so this should
                # never fail.
                msg = self._event_queue.get_nowait()
                if msg.needs_own_batch:
                    if batch:
                        batches.append(batch)
                    batches.append([msg])
                    batch = []
                else:
                    batch.append(msg)
        if batch:
            batches.append(batch)

        num_splits = 0
        while batches:
            # Process the first batch on our queue of batches.  Invariant:
            # we'll either process this batch to completion and discard it or
            # we'll put all the messages back into the batch queue in the same
            # order but with a first batch that is half the size and the
            # rest of its messages in the second batch.
            batch = batches.pop(0)
            # Give subclass a chance to filter the batch/update its state.
            batch = self._start_msg_batch(batch)
            assert batch is not None, "_start_msg_batch() should return batch."
            results = []  # Will end up same length as batch.
            for msg in batch:
                _log.debug("Message %s recd by %s from %s, queue length %d",
                           msg, msg.recipient, msg.caller,
                           self._event_queue.qsize())
                self._current_msg = msg
                actor_storage.msg_uuid = msg.uuid
                actor_storage.msg_name = msg.name
                try:
                    # Actually execute the per-message method and record its
                    # result.
                    result = msg.method()
                except BaseException as e:
                    _log.exception("Exception processing %s", msg)
                    results.append(ResultOrExc(None, e))
                    _stats.increment("Messages executed with exception")
                else:
                    results.append(ResultOrExc(result, None))
                    _stats.increment("Messages executed OK")
                finally:
                    self._current_msg = None
                    actor_storage.msg_uuid = None
                    actor_storage.msg_name = None
            try:
                # Give subclass a chance to post-process the batch.
                _log.debug("Finishing message batch")
                actor_storage.msg_name = "<finish batch>"
                self._finish_msg_batch(batch, results)
            except SplitBatchAndRetry:
                # The subclass couldn't process the batch as is (probably
                # because a failure occurred and it couldn't figure out which
                # message caused the problem).  Split the batch into two and
                # re-run it.
                _log.warn("Splitting batch to retry.")
                self.__split_batch(batch, batches)
                num_splits += 1  # For diags.
                _stats.increment("Split batches")
                continue
            except BaseException as e:
                # Most-likely a bug.  Report failure to all callers.
                _log.exception("_finish_msg_batch failed.")
                results = [(None, e)] * len(results)
                _stats.increment("_finish_msg_batch() exception")
            finally:
                actor_storage.msg_name = None

            # Batch complete and finalized, set all the results.
            assert len(batch) == len(results)
            for msg, (result, exc) in zip(batch, results):
                for future in msg.results:
                    if exc is not None:
                        future.set_exception(exc)
                    else:
                        future.set(result)
                    _stats.increment("Messages completed")

            _stats.increment("Batches processed")
        if num_splits > 0:
            _log.warn("Split batches complete. Number of splits: %s",
                      num_splits)

    @staticmethod
    def __split_batch(current_batch, remaining_batches):
        """
        Splits batch in half and prepends it to the list of remaining
        batches. Modifies remaining_batches in-place.

        :param list[Message] current_batch: list of messages that's currently
               being processed.
        :param list[list[Message]] remaining_batches: list of batches
               still to process.
        """
        assert len(current_batch) > 1, "Batch too small to split"
        # Split the batch.
        split_point = len(current_batch) // 2
        _log.debug("Split-point = %s", split_point)
        first_half = current_batch[:split_point]
        second_half = current_batch[split_point:]
        if remaining_batches and not remaining_batches[0][0].needs_own_batch:
            # Optimization: there's another batch already queued and
            # it also contains batchable messages push the second
            # half of this batch onto the front of that one.
            _log.debug("Split batch and found a subsequent batch, "
                       "coalescing with that.")
            next_batch = remaining_batches[0]
            next_batch[:0] = second_half
        else:
            _log.debug("Split batch but cannot prepend to next batch, adding "
                       "both splits to start of queue.")
            remaining_batches[:0] = [second_half]
        remaining_batches[:0] = [first_half]

    def _start_msg_batch(self, batch):
        """
        Called before processing a batch of messages to give subclasses
        a chance to filter the batch.  Implementations must ensure that
        every AsyncResult in the batch is correctly set.  Usually, that
        means combining them into one list.

        It is usually easier to build up a batch of changes to make in the
        @actor_message-decorated methods and then process them in
        _finish_msg_batch().

        Intended to be overridden.  This implementation simply returns the
        input batch.

        :param list[Message] batch:
        """
        return batch

    def _finish_msg_batch(self, batch, results):
        """
        Called after a batch of events have been processed from the queue
        before results are set.

        Intended to be overridden.  This implementation does nothing.

        Exceptions raised by this method are propagated to all messages in the
        batch, overriding the existing results.  It is recommended that the
        implementation catches appropriate exceptions and maps them back
        to the correct entry in results.

        :param list[ResultOrExc] results: Pairs of (result, exception)
            representing the result of each message-processing function.
            Only one of the values is set.  Updates to the list alter the
            result send to any waiting listeners.
        :param list[Message] batch: The input batch, always the same length as
            results.
        """
        pass

    def _maybe_yield(self):
        """
        With some probability, yields processing to another greenlet.
        (Utility method to be called from the actor's greenlet during
        long-running operations.)
        """
        self._op_count += 1
        if self._op_count >= self.max_ops_before_yield:
            gevent.sleep()
            self._op_count = 0

    def __str__(self):
        return self.__class__.__name__ + "<%s,queue_len=%s,live=%s,msg=%s>" % (
            self.qualifier, self._event_queue.qsize(), bool(
                self.greenlet), self._current_msg)
class ArchivariusBridge(object):
    """Archivarius Bridge"""
    def __init__(self, config):
        self.config = config
        self.workers_config = {}
        self.log_dict = {}
        self.bridge_id = uuid.uuid4().hex
        self.api_host = self.config_get('resources_api_server')
        self.api_version = self.config_get('resources_api_version')

        # Workers settings
        for key in WORKER_CONFIG:
            self.workers_config[key] = (self.config_get(key)
                                        or WORKER_CONFIG[key])

        # Init config
        for key in DEFAULTS:
            value = self.config_get(key)
            setattr(self, key,
                    type(DEFAULTS[key])(value) if value else DEFAULTS[key])

        # Pools
        self.workers_pool = Pool(self.workers_max)
        self.retry_workers_pool = Pool(self.retry_workers_max)
        self.filter_workers_pool = Pool()

        # Queues
        self.api_clients_queue = Queue()
        if self.resource_items_queue_size == -1:
            self.resource_items_queue = Queue()
        else:
            self.resource_items_queue = Queue(self.resource_items_queue_size)
        if self.retry_resource_items_queue_size == -1:
            self.retry_resource_items_queue = Queue()
        else:
            self.retry_resource_items_queue = Queue(
                self.retry_resource_items_queue_size)

        # Default values for statistic variables
        for key in (
                'droped',
                'add_to_resource_items_queue',
                'add_to_retry',
                'exceptions_count',
                'not_found_count',
                'archived',
                'moved_to_public_archive',
                'dumped_to_secret_archive',
        ):
            self.log_dict[key] = 0

        if self.api_host != '' and self.api_host is not None:
            api_host = urlparse(self.api_host)
            if api_host.scheme == '' and api_host.netloc == '':
                raise ConfigError('Invalid \'resources_api_server\' url.')
        else:
            raise ConfigError('In config dictionary empty or missing'
                              ' \'resources_api_server\'')
        self.db = prepare_couchdb(self.couch_url, self.db_name, logger)
        self.archive_db = prepare_couchdb(self.couch_url, self.db_archive_name,
                                          logger)
        # TODO
        self.archive_db2 = prepare_couchdb(self.couch_url,
                                           self.db_archive_name + '_secret',
                                           logger)

        self.resources = {}
        for entry_point in iter_entry_points(
                'openprocurement.archivarius.resources'):
            self.resources[entry_point.name] = {
                'filter':
                entry_point.load(),
                'view_path':
                '_design/{}/_view/by_dateModified'.format(entry_point.name)
            }

    def create_api_client(self):
        client_user_agent = self.user_agent + '/' + self.bridge_id + '/' + uuid.uuid4(
        ).hex
        timeout = 0.1
        while True:
            try:
                api_client = APIClient(host_url=self.api_host,
                                       user_agent=client_user_agent,
                                       api_version=self.api_version,
                                       resource='RESOURCE',
                                       key=self.api_key)
                self.api_clients_queue.put({
                    'client': api_client,
                    'request_interval': 0
                })
                logger.info('Started api_client {}'.format(
                    api_client.session.headers['User-Agent']))
                break
            except RequestFailed as e:
                self.log_dict['exceptions_count'] += 1
                logger.error(
                    'Failed start api_client with status code {}'.format(
                        e.status_code))
                timeout = timeout * 2
                sleep(timeout)

    def fill_api_clients_queue(self):
        while self.api_clients_queue.qsize() == 0:
            self.create_api_client()

    def fill_resource_items_queue(self, resource):
        start_time = datetime.now(TZ)
        rows = self.db.iterview(self.resources[resource]['view_path'],
                                10**3,
                                include_docs=True)
        filter_func = partial(self.resources[resource]['filter'],
                              time=start_time)
        for row in ifilter(filter_func, rows):
            self.resource_items_queue.put({'id': row.id, 'resource': resource})
            self.log_dict['add_to_resource_items_queue'] += 1

    def queues_controller(self):
        while True:
            self.fill_api_clients_queue()
            #if self.workers_pool.free_count() > 0 and (self.resource_items_queue.qsize() > int((self.resource_items_queue_size / 100) * self.workers_inc_threshold)):
            if self.resource_items_queue.qsize(
            ) > 0 and self.workers_pool.free_count() > 0:
                w = ArchiveWorker.spawn(self.api_clients_queue,
                                        self.resource_items_queue, self.db,
                                        self.archive_db, self.archive_db2,
                                        self.workers_config,
                                        self.retry_resource_items_queue,
                                        self.log_dict)
                self.workers_pool.add(w)
                logger.info('Queue controller: Create main queue worker.')
            #elif self.resource_items_queue.qsize() < int((self.resource_items_queue_size / 100) * self.workers_dec_threshold):
            elif self.resource_items_queue.qsize() == 0:
                if len(self.workers_pool) > self.workers_min:
                    wi = self.workers_pool.greenlets.pop()
                    wi.shutdown()
                    logger.info('Queue controller: Kill main queue worker.')
            logger.info('Main resource items queue contains {} items'.format(
                self.resource_items_queue.qsize()))
            logger.info('Retry resource items queue contains {} items'.format(
                self.retry_resource_items_queue.qsize()))
            logger.info(
                'Status: add to queue - {add_to_resource_items_queue}, add to retry - {add_to_retry}, moved to public archive - {moved_to_public_archive}, dumped to secret archive - {dumped_to_secret_archive}, archived - {archived}, exceptions - {exceptions_count}, not found - {not_found_count}'
                .format(**self.log_dict))
            sleep(self.queues_controller_timeout)

    def gevent_watcher(self):
        self.fill_api_clients_queue()
        if not self.resource_items_queue.empty() and len(
                self.workers_pool) < self.workers_min:
            w = ArchiveWorker.spawn(self.api_clients_queue,
                                    self.resource_items_queue, self.db,
                                    self.archive_db, self.archive_db2,
                                    self.workers_config,
                                    self.retry_resource_items_queue,
                                    self.log_dict)
            self.workers_pool.add(w)
            logger.info('Watcher: Create main queue worker.')
        if not self.retry_resource_items_queue.empty() and len(
                self.retry_workers_pool) < self.retry_workers_min:
            w = ArchiveWorker.spawn(self.api_clients_queue,
                                    self.retry_resource_items_queue, self.db,
                                    self.archive_db, self.archive_db2,
                                    self.workers_config,
                                    self.retry_resource_items_queue,
                                    self.log_dict)
            self.retry_workers_pool.add(w)
            logger.info('Watcher: Create retry queue worker.')

    def run(self):
        logger.info('Start Archivarius Bridge',
                    extra={'MESSAGE_ID': 'edge_bridge_start_bridge'})
        for resource in self.resources:
            self.filter_workers_pool.spawn(self.fill_resource_items_queue,
                                           resource=resource)
        spawn(self.queues_controller)
        while True:
            self.gevent_watcher()
            if len(self.filter_workers_pool) == 0 and len(
                    self.workers_pool) == 0 and len(
                        self.retry_workers_pool) == 0:
                break
            sleep(self.watch_interval)

    def config_get(self, name):
        try:
            return self.config.get('main', name)
        except NoOptionError:
            return
Beispiel #57
0
class MeekSession(RelaySession):
    conn_pool = HTTPClientPool()

    def __init__(self, socksconn, meek, timeout):
        super(MeekSession, self).__init__(socksconn)
        self.sessionid = session_id()
        self.meek = meek
        self.meektimeout = timeout
        self.relay = self.meek.select_relay()
        self.ca_certs = self.meek.ca_certs

        self.httpclient = self.conn_pool.get(self.relay, self.ca_certs,
                                             self.meektimeout)

        self.udpsock = None
        self.allsocks = [self.socksconn]

        self.l2m_queue = Queue()
        self.m2l_queue = Queue()
        self.m_notifier = Event()
        self.l_notifier = Event()
        self.finish = Event()
        self.m_notifier.clear()
        self.l_notifier.clear()
        self.finish.clear()
        self.timer = SharedTimer(self.meektimeout)

    def _stream_response(self, response):
        try:
            chunk = response.read(MAX_PAYLOAD_LENGTH)
            while chunk:
                log.debug("%s streaming DOWN %d bytes" %
                          (self.sessionid, len(chunk)))
                yield chunk, ""
                chunk = response.read(MAX_PAYLOAD_LENGTH)
        except GeneratorExit:
            response.release()
            raise StopIteration

    def meek_response(self, response, stream):
        if stream:
            return self._stream_response(response)
        data = response.read()
        response.release()
        if not data:
            return [("", "")]
        if not self.udpsock:
            return [(data, "")]

        # parse UDP packets
        log.debug("%s DOWN %d bytes" % (self.sessionid, len(data)))
        lengths = get_meek_meta(response.headers, HEADER_UDP_PKTS).split(",")
        pos = 0
        pkts = []
        for length in lengths:
            nxt = pos + int(length)
            pkts.append((data[pos:nxt], ""))
            pos = nxt
        return pkts

    def meek_roundtrip(self, pkts):
        headers = {
            HEADER_SESSION_ID: self.sessionid,
            HEADER_MSGTYPE: MSGTYPE_DATA,
            'Host': self.relay.hostname,
            'Content-Type': "application/octet-stream",
            'Connection': "Keep-Alive",
        }
        stream = False
        if not self.udpsock and "stream" in self.relay.properties:
            stream = True
            headers[HEADER_MODE] = MODE_STREAM

        if pkts and self.udpsock:
            lengths = str(",".join([str(len(p)) for p in pkts]))
            headers[HEADER_UDP_PKTS] = lengths

        data = "".join(pkts)
        headers['Content-Length'] = str(len(data))
        for _ in range(CLIENT_MAX_TRIES):
            try:
                log.debug("%s UP %d bytes" % (self.sessionid, len(data)))
                resp = self.httpclient.post("/", body=data, headers=headers)
                if resp.status_code != 200:
                    # meek server always give 200, so all non-200s mean external issues.
                    continue
                err = get_meek_meta(resp.headers, HEADER_ERROR)
                if err:
                    return [("", err)]
                else:

                    try:
                        return self.meek_response(resp, stream)
                    except Exception as ex:
                        log.error(
                            "[Exception][meek_roundtrip - meek_response]: %s" %
                            str(ex))
                        resp.release()
                        return [("", "Data Format Error")]
            except socket.timeout:  # @UndefinedVariable
                return [("", "timeout")]
            except Exception as ex:
                log.error("[Exception][meek_roundtrip]: %s" % str(ex))
                gevent.sleep(CLIENT_RETRY_DELAY)
        self.relay.failure += 1
        return [("", "Max Retry (%d) Exceeded" % CLIENT_MAX_TRIES)]

    def meek_sendrecv(self):
        pkts = []
        datalen = 0
        while not self.l2m_queue.empty():
            pkt = self.l2m_queue.get()
            pkts.append(pkt)
            datalen += len(pkt)
            if datalen >= MAX_PAYLOAD_LENGTH:
                for (resp, err) in self.meek_roundtrip(pkts):
                    yield (resp, err)
                    if err or not resp:
                        return

                pkts = []
                datalen = 0
        for (resp, err) in self.meek_roundtrip(pkts):
            yield (resp, err)
            if err or not resp:
                return

    def meek_relay(self):
        for (resp, err) in self.meek_sendrecv():
            if err:
                return err
            if resp:
                self.m2l_queue.put(resp)
                self.l_notifier.set()
        return ""

    def meek_relay_thread(self):
        interval = CLIENT_INITIAL_POLL_INTERVAL
        while not self.finish.is_set():
            try:
                hasdata = self.m_notifier.wait(timeout=interval)
                self.m_notifier.clear()
                err = self.meek_relay()
                if err:
                    break
                if not hasdata:
                    interval *= CLIENT_POLL_INTERVAL_MULTIPLIER
                    if interval > CLIENT_MAX_POLL_INTERVAL:
                        interval = CLIENT_MAX_POLL_INTERVAL
            except Exception as ex:
                log.error("[Exception][meek_relay_thread]: %s" % str(ex))
                break
        self.finish.set()

    def write_to_client(self, data):
        if self.udpsock:
            self.udpsock.sendto(data, self.last_clientaddr)
        else:
            self.socksconn.sendall(data)

    def meek_write_to_client_thread(self):
        while not self.finish.is_set():
            try:
                hasdata = self.l_notifier.wait(
                    timeout=CLIENT_MAX_POLL_INTERVAL)
                self.l_notifier.clear()
                if not hasdata:
                    self.timer.count(CLIENT_MAX_POLL_INTERVAL)
                    if self.timer.timeout():
                        break
                else:
                    self.timer.reset()
                    while not self.m2l_queue.empty():
                        data = self.m2l_queue.get()
                        if data:
                            self.write_to_client(data)
            except Exception as ex:
                log.error("[Exception][meek_write_to_client_thread]: %s" %
                          str(ex))
                break
        self.finish.set()

    def read_from_client(self, timeout):
        readable, _, _ = select.select(self.allsocks, [], [],
                                       CLIENT_MAX_POLL_INTERVAL)
        if not readable:
            return None
        if self.socksconn in readable:
            if self.udpsock:
                raise RelaySessionError(
                    "unexcepted read-event from tcp socket in UDP session")
            data = self.socksconn.recv(MAX_PAYLOAD_LENGTH)
            if not data:
                raise RelaySessionError("peer closed")
            return data
        if self.udpsock and self.udpsock in readable:
            data, addr = self.udpsock.recvfrom(MAX_PAYLOAD_LENGTH)
            if not self.valid_udp_client(addr):
                return None
            else:
                self.last_clientaddr = addr
                return data

    def meek_read_from_client_thread(self):
        while not self.finish.is_set():
            try:
                data = self.read_from_client(CLIENT_MAX_POLL_INTERVAL)
                if not data:
                    self.timer.count(CLIENT_MAX_POLL_INTERVAL)
                    if self.timer.timeout():
                        break
                else:
                    self.timer.reset()
                    self.l2m_queue.put(data)
                    self.m_notifier.set()
            except Exception as ex:
                log.error("[Exception][meek_read_from_client_thread]: %s" %
                          str(ex))
                break
        self.finish.set()

    def proc_tcp_request(self, req):
        self.l2m_queue.put(req.pack())

    def relay_tcp(self):
        read_thread = gevent.spawn(self.meek_read_from_client_thread)
        write_thread = gevent.spawn(self.meek_write_to_client_thread)
        relay_thread = gevent.spawn(self.meek_relay_thread)
        # notify relay to send request
        self.m_notifier.set()
        [t.join() for t in (read_thread, write_thread, relay_thread)]
        log.info("Session %s Ended" % self.sessionid)

    def valid_udp_client(self, addr):
        if  self.client_associate[0] == "0.0.0.0" or \
                self.client_associate[0] == "::":
            return True
        if self.client_associate == addr:
            return True
        return False

    def cmd_udp_associate(self, req):
        self.client_associate = (req.dstaddr, req.dstport)
        self.last_clientaddr = self.client_associate
        for (resp, err) in self.meek_roundtrip([req.pack()]):
            if err:
                return
            if resp:
                Reply(resp)

        self.udpsock = bind_local_udp(self.socksconn)
        if not self.udpsock:
            request_fail(self.socksconn, req, GENERAL_SOCKS_SERVER_FAILURE)
            return
        self.track_sock(self.udpsock)

        read_thread = gevent.spawn(self.meek_read_from_client_thread)
        write_thread = gevent.spawn(self.meek_write_to_client_thread)
        relay_thread = gevent.spawn(self.meek_relay_thread)

        request_success(self.socksconn, *sock_addr_info(self.udpsock))
        [t.join() for t in (read_thread, write_thread, relay_thread)]
        log.info("Session %s Ended" % self.sessionid)

    def meek_terminate(self):
        headers = {
            HEADER_SESSION_ID: self.sessionid,
            HEADER_MSGTYPE: MSGTYPE_TERMINATE,
            #'Content-Type':     "application/octet-stream",
            'Content-Length': "0",
            'Connection': "Keep-Alive",
            'Host': self.relay.hostname,
        }
        try:
            self.httpclient.post("/", data="", headers=headers)
        except:
            pass

    def clean(self):
        self.meek_terminate()
        for sock in self.allsocks:
            sock.close()
        #self.httpclient.close()
        self.conn_pool.release(self.relay, self.httpclient)
Beispiel #58
0
class TreeWatcher(object):
    """A watcher will subscribe events from a tree holder and turn them into
    iterator.

    :param tree_hub: A :class:`TreeHub` instance.
    :param from_application_name: The name of caller application.
    :param with_initial: ``True`` if you want to dump whole tree as the first
                         element of iterator.
    :param life_span: The life span in seconds of this session.
    """

    MESSAGE_TYPES = {
        TreeEvent.NODE_ADDED: 'update',
        TreeEvent.NODE_UPDATED: 'update',
        TreeEvent.NODE_REMOVED: 'delete'
    }

    TYPE_NAMES = (SERVICE_SUBDOMAIN, SWITCH_SUBDOMAIN, CONFIG_SUBDOMAIN,
                  EXTRA_SUBDOMAIN_SERVICE_INFO)

    PATH_LEVEL_TYPE = 1  # /huskar/{type}
    PATH_LEVEL_APPLICATION = 2  # /huskar/{type}/{application}
    PATH_LEVEL_CLUSTER = 3  # /huskar/{type}/{application}/{cluster}
    PATH_LEVEL_INSTANCE = 4  # /huskar/{type}/{application}/{cluster}/{id}

    def __init__(self,
                 tree_hub,
                 from_application_name=None,
                 from_cluster_name=None,
                 with_initial=False,
                 life_span=None,
                 metrics_tag_from=None):
        self.hub = tree_hub

        # The optional route context
        self.from_application_name = from_application_name
        self.from_cluster_name = from_cluster_name

        self.with_initial = with_initial
        self.queue = Queue()
        self.holders = set()
        self.cluster_maps = collections.defaultdict(ClusterMap)
        self.cluster_whitelist = collections.defaultdict(set)
        self.watch_map = collections.defaultdict(set)
        self.life_span = life_span
        self._metrics_tag_from = metrics_tag_from

    def __iter__(self):
        """The tree watcher is iterable for subscribing events."""
        monitor_client.increment('tree_watcher.session',
                                 1,
                                 tags={
                                     'from': str(self._metrics_tag_from),
                                     'appid': str(self._metrics_tag_from),
                                 })
        started_at = time.time()
        if self.with_initial:
            body = self._load_entire_body()
            yield ('all', body)
            monitor_client.increment('tree_watcher.event',
                                     1,
                                     tags={
                                         'from': str(self._metrics_tag_from),
                                         'appid': str(self._metrics_tag_from),
                                         'event_type': 'all',
                                     })
        while True:
            while not self.queue.empty():
                event_type, body = self.queue.get()
                yield (event_type, body)
                monitor_client.increment('tree_watcher.event',
                                         1,
                                         tags={
                                             'from':
                                             str(self._metrics_tag_from),
                                             'appid':
                                             str(self._metrics_tag_from),
                                             'event_type': event_type,
                                         })
            yield ('ping', {})
            if self.life_span and time.time() > started_at + self.life_span:
                break
            sleep(1)

    def watch(self, application_name, type_name):
        """Watches a new subtree.

        :param application_name: The appid of subtree. (e.g. ``base.foo``)
        :param type_name: The type of subtree. (e.g. ``service``)
        """
        with self.maintain_watch_map(application_name, type_name) as type_name:
            holder = self.hub.get_tree_holder(application_name, type_name)
            if holder in self.holders:
                return
            try:
                holder.block_until_initialized(
                    timeout=settings.ZK_SETTINGS['treewatch_timeout'])
            except TreeTimeoutError:
                self.hub.release_tree_holder(application_name, type_name)
                raise

        cluster_map = self.cluster_maps[application_name, type_name]
        cluster_routes = holder.list_cluster_routes(self.from_application_name,
                                                    self.from_cluster_name)
        for cluster_name, resolved_name in cluster_routes:
            cluster_map.register(cluster_name, resolved_name)

        self.holders.add(holder)
        holder.tree_changed.connect(self.handle_event, sender=holder)

    @contextlib.contextmanager
    def maintain_watch_map(self, application_name, type_name):
        subdomain = subdomain_map[type_name]
        self.watch_map[subdomain.name].add(application_name)
        try:
            yield subdomain.basic_name
        except Exception:
            self.watch_map[subdomain.name].discard(application_name)
            raise

    def limit_cluster_name(self, application_name, type_name, cluster_name):
        """Adds a whitelist item to limit events by cluster name.

        :param application_name: The appid of subtree. (e.g. ``base.foo``)
        :param type_name: The type of subtree. (e.g. ``service``)
        :param cluster_name: Only added cluster names will be shown.
        """
        self.cluster_whitelist[application_name, type_name].add(cluster_name)

    def handle_event(self, sender, event):
        path = parse_path(self.hub.base_path, event.event_data.path)
        path_level = path.get_level()

        if path.is_none() or path_level == self.PATH_LEVEL_TYPE:
            logger.warning('Unexpected path: %r', event)
            return

        # We should notify for changes of cluster route.
        if path_level in (self.PATH_LEVEL_APPLICATION,
                          self.PATH_LEVEL_CLUSTER):

            # Publish message if and only if node is modified
            if (event.event_type == TreeEvent.NODE_ADDED
                    and not event.event_data.data):
                return

            cluster_map = self.cluster_maps[path.application_name,
                                            path.type_name]
            last_cluster_names = dict(cluster_map.cluster_names)
            self._update_cluster_route(path, event)

            # Publish message if and only if the callee cluster is watched
            if self._has_cluster_route_changed(path, last_cluster_names):
                # Dump all data for symlink or route changing
                entire_body = self._load_entire_body()
                message = ('all', entire_body)
                self.queue.put(message)
            else:
                # Dump updated data for watched extra types
                body = self.handle_event_for_extra_type('update', path)
                if body:
                    message = ('update', body)
                    self.queue.put(message)

        # We should notify for changes of instance node.
        if path_level == self.PATH_LEVEL_INSTANCE:
            data = event.event_data.data
            event_type = event.event_type
            if event_type == TreeEvent.NODE_REMOVED:
                data = None
            entire_body = self._dump_body([(path, data)])
            if entire_body:
                message = (self.MESSAGE_TYPES[event_type], entire_body)
                self.queue.put(message)
            return

    def _load_entire_body(self):
        entire_body = self._dump_body(self._iter_instance_nodes())
        extra_types_data = self.handle_all_for_extra_type()
        entire_body.update(extra_types_data)
        entire_body = self._fill_body(entire_body)
        return entire_body

    def _update_cluster_route(self, path, event):
        # symlink or route changed only at service scope
        path_level = path.get_level()
        cluster_map = self.cluster_maps[path.application_name, path.type_name]
        holder = self.hub.get_tree_holder(path.application_name,
                                          path.type_name)
        force_route_cluster_name = self.from_cluster_name \
            if path.type_name == SERVICE_SUBDOMAIN else None

        # Update cluster map for route
        if self.from_application_name and self.from_cluster_name:
            # NOTE It is not possible to know whether the changed cluster
            # is a middle node in the [route -> symlink -> physical] chain
            # style configuration.
            # We must resolve all intent in whichever cluster changed.
            for intent in settings.ROUTE_INTENT_LIST:
                resolved_name = holder.cluster_resolver.resolve(
                    self.from_cluster_name,
                    self.from_application_name,
                    intent,
                    force_route_cluster_name=force_route_cluster_name)
                if resolved_name is None:
                    resolved_name = self.from_cluster_name
                cluster_map.deregister(intent)
                cluster_map.register(intent, resolved_name)

        # Update cluster map for symlink
        if path_level == self.PATH_LEVEL_CLUSTER:
            resolved_name = holder.cluster_resolver.resolve(
                path.cluster_name,
                force_route_cluster_name=force_route_cluster_name)
            cluster_map.deregister(path.cluster_name)
            cluster_map.register(path.cluster_name, resolved_name)

    def _has_cluster_route_changed(self, path, last_cluster_names):
        cluster_map = self.cluster_maps[path.application_name, path.type_name]
        cluster_whitelist = self.cluster_whitelist[path.application_name,
                                                   path.type_name]

        # Compare the difference of cluster names
        cluster_difference = set(
            dict(
                set(last_cluster_names.items())
                ^ set(cluster_map.cluster_names.items())))
        if cluster_difference:
            return not cluster_whitelist or (len(
                cluster_whitelist.intersection(cluster_difference)) != 0)
        return False

    def handle_all_for_extra_type(self):
        body = {}
        for type_name in subdomain_map.BASIC_SUBDOMAINS:
            path = Path.make(type_name=type_name)
            type_body = self.handle_event_for_extra_type('all', path)
            body.update(type_body)
        return body

    def handle_event_for_extra_type(self, event_type, path):
        body = {}
        if not switch.is_switched_on(SWITCH_ENABLE_META_MESSAGE_CANARY):
            return body
        extra_types = subdomain_map.get_extra_types(path.type_name)
        for extra_type in extra_types:
            type_data = body.setdefault(extra_type, {})
            application_names = set()
            if path.application_name:
                if path.application_name in self.watch_map[extra_type]:
                    application_names.add(path.application_name)
            else:
                application_names = self.watch_map[extra_type]
            for application_name in application_names:
                app_data = type_data.setdefault(application_name, {})
                handler = extra_handlers[extra_type, event_type]
                data = handler(
                    self,
                    Path.make(path.type_name, application_name,
                              path.cluster_name, path.data_name))
                if data:
                    app_data.update(data)
        return {
            type_name: type_data
            for type_name, type_data in body.items() if any(type_data.values())
        }

    def _iter_instance_nodes(self):
        for holder in self.holders:
            if holder.type_name in self.watch_map:
                for path, data in holder.list_instance_nodes():
                    yield path, data

    def _dump_body(self, pairs):
        entire_body = {}
        for path, data in pairs:
            cluster_map = self.cluster_maps[path.application_name,
                                            path.type_name]
            cluster_whitelist = self.cluster_whitelist[path.application_name,
                                                       path.type_name]
            cluster_names = set([path.cluster_name]).union(
                cluster_map.resolved_names[path.cluster_name])

            # We should ignore the subtree of callee cluster because it
            # has been overrided by symlink or route.
            if cluster_map.cluster_names.get(path.cluster_name):
                continue

            if cluster_whitelist:
                cluster_names = cluster_names & cluster_whitelist

            for cluster_name in cluster_names:
                data_body = entire_body \
                    .setdefault(path.type_name, {}) \
                    .setdefault(path.application_name, {}) \
                    .setdefault(cluster_name, {}) \
                    .setdefault(decode_key(path.data_name), {})
                data_body['value'] = data
        return entire_body

    def _fill_body(self, body):
        # Fills the type names and application names
        for type_name in self.TYPE_NAMES:
            type_data = body.setdefault(type_name, {})
            application_names = self.watch_map.get(type_name, [])
            for application_name in application_names:
                type_data.setdefault(application_name, {})
        # Fills the cluster names
        for (application_name, type_name), cluster_names \
                in self.cluster_whitelist.iteritems():
            for cluster_name in cluster_names:
                body.setdefault(type_name, {}) \
                    .setdefault(application_name, {}) \
                    .setdefault(cluster_name, {})
        # Checks extra information
        self._detect_bad_route(body)
        return body

    def _detect_bad_route(self, body):
        if not switch.is_switched_on(SWITCH_DETECT_BAD_ROUTE):
            return
        if self.from_application_name in settings.LEGACY_APPLICATION_LIST:
            return
        from_cluster_blacklist = settings.ROUTE_FROM_CLUSTER_BLACKLIST.get(
            self.from_application_name, [])
        if self.from_cluster_name in from_cluster_blacklist:
            return

        type_name = SERVICE_SUBDOMAIN
        type_body = body[type_name]

        flat_cluster_names = (
            (application_name, cluster_name, cluster_body)
            for application_name, application_body in type_body.iteritems()
            for cluster_name, cluster_body in application_body.iteritems())

        for application_name, cluster_name, cluster_body in flat_cluster_names:
            if application_name in settings.LEGACY_APPLICATION_LIST:
                continue
            if cluster_name in settings.ROUTE_DEST_CLUSTER_BLACKLIST.get(
                    application_name, []):
                continue

            cluster_map = self.cluster_maps[application_name, type_name]
            resolved_name = cluster_map.cluster_names.get(cluster_name)
            if cluster_body or not resolved_name:
                continue
            monitor_client.increment(
                'tree_watcher.bad_route',
                1,
                tags=dict(
                    from_application_name=self.from_application_name,
                    from_cluster_name=self.from_cluster_name,
                    dest_application_name=application_name,
                    appid=application_name,
                    dest_cluster_name=cluster_name,
                    dest_resolved_cluster_name=resolved_name,
                ))
            logger.info('Bad route detected: %s %s %s %s -> %s (%r)',
                        self.from_application_name, self.from_cluster_name,
                        application_name, cluster_name, resolved_name,
                        dict(cluster_map.cluster_names))
Beispiel #59
0
class Server(object):
    def __init__(self, address, size=None, log_level=DEFAULT_LOG_LEVEL):
        self.daemon = True
        self.started = False
        self.size = size
        self.queue = Queue(maxsize=size)
        self.address = address

        self.context = zmq.Context(1)
        self.server = None
        self.logger = get_logger(self, log_level)

        self._has_fetched_jobs = False

    def send(self, cmd, data=''):
        self.server.send_multipart([cmd, data])

    def recv(self):
        reply = self.server.recv_multipart()

        assert len(reply) == 2

        return reply

    def bind(self):
        if self.server:
            self.server.close()

        self.server = self.context.socket(zmq.REP)
        self.server.bind(self.address)

    def start(self):
        self.started = True

        self.logger.info("Taskmaster binding to %r", self.address)
        self.bind()

        while self.started:
            gevent.sleep(0)
            cmd, data = self.recv()
            if cmd == 'GET':
                if not self.has_work():
                    self.send('QUIT')
                    continue

                try:
                    job = self.queue.get_nowait()
                except Empty:
                    self.send('WAIT')
                    continue

                self.send('OK', pickle.dumps(job))

            elif cmd == 'DONE':
                self.queue.task_done()
                if self.has_work():
                    self.send('OK')
                else:
                    self.send('QUIT')

            else:
                self.send('ERROR', 'Unrecognized command')

        self.logger.info('Shutting down')
        self.shutdown()

    def mark_queue_filled(self):
        self._has_fetched_jobs = True

    def put_job(self, job):
        return self.queue.put(job)

    def first_job(self):
        return self.queue.queue[0]

    def get_current_size(self):
        return self.queue.qsize()

    def get_max_size(self):
        return self.size

    def has_work(self):
        if not self._has_fetched_jobs:
            return True
        return not self.queue.empty()

    def is_alive(self):
        return self.started

    def shutdown(self):
        if not self.started:
            return
        self.server.close()
        self.context.term()
        self.started = False
Beispiel #60
0
class CeleryReporter(Greenlet):
    one_min_stats = []
    one_sec_stats = ['queued']

    def _set_config(self, **config):
        self.config = dict(celery_task_prefix='simplecoin.tasks',
                           celery={'CELERY_DEFAULT_QUEUE': 'celery'},
                           report_pool_stats=True,
                           share_batch_interval=60,
                           tracker_expiry_time=180)
        self.config.update(config)

        # check that we have at least one configured coin server
        if not self.config['celery_task_prefix']:
            self.logger.error("You need to specify a celery prefix")
            exit()

    def __init__(self, server, **config):
        Greenlet.__init__(self)
        self.logger = server.register_logger('reporter')
        self._set_config(**config)

        # setup our celery agent and monkey patch
        self.celery = Celery()
        self.celery.conf.update(self.config['celery'])

        self.share_reporter = None

        self.server = server
        self.server.register_stat_counters(self.one_min_stats,
                                           self.one_sec_stats)

        self.queue = Queue()
        self.addresses = {}
        self.workers = {}

    @property
    def status(self):
        dct = dict(queue_size=self.queue.qsize(),
                   addresses_count=len(self.addresses),
                   workers_count=len(self.workers))
        dct.update({
            key: self.server[key].summary()
            for key in self.one_min_stats + self.one_sec_stats
        })
        return dct

    # Remote methods to send information to other servers
    ########################
    def add_one_minute(self, *args, **kwargs):
        self.server['queued'].incr()
        self.queue.put(("add_one_minute", args, kwargs))
        self.logger.info("Calling celery task {} with {}".format(
            "add_one_minute", args))

    def add_share(self, *args, **kwargs):
        self.server['queued'].incr()
        self.queue.put(("add_share", args, kwargs))
        self.logger.info("Calling celery task {} with {}".format(
            "add_shares", args))

    def agent_send(self, *args, **kwargs):
        self.server['queued'].incr()
        self.queue.put(("agent_receive", args, kwargs))

    def add_block(self, *args, **kwargs):
        self.server['queued'].incr()
        self.queue.put(("add_block", args, kwargs))
        self.logger.info("Calling celery task {} with {}".format(
            "transmit_block", args))

    def _run(self):
        self.share_reporter = spawn(self.report_loop)
        while True:
            self._queue_proc()

    def _queue_proc(self):
        name, args, kwargs = self.queue.peek()
        try:
            self.celery.send_task(
                self.config['celery_task_prefix'] + '.' + name, args, kwargs)
        except Exception as e:
            self.logger.error(
                "Unable to communicate with celery broker! {}".format(e))
        else:
            self.queue.get()

    def report_loop(self):
        """ Repeatedly do our share reporting on an interval """
        while True:
            sleep(self.config['share_batch_interval'])
            try:
                self._report_shares()
            except Exception:
                self.logger.error("Unhandled error in report shares",
                                  exc_info=True)

    def _report_shares(self, flush=False):
        """ Goes through our internal aggregated share data structures and
        reports them to our external storage. If asked to flush it will report
        all one minute shares, otherwise it will only report minutes that have
        passed. """
        if flush:
            self.logger.info("Flushing all aggreated share data...")

        self.logger.info("Reporting shares for {:,} users".format(
            len(self.addresses)))
        t = time.time()
        for address, tracker in self.addresses.items():
            tracker.report()
            # if the last log time was more than expiry time ago...
            if (tracker.last_log + self.config['tracker_expiry_time']) < t:
                assert tracker.unreported == 0
                del self.addresses[address]

        self.logger.info("Shares reported (queued) in {}".format(
            time_format(time.time() - t)))
        self.logger.info(
            "Reporting one minute shares for {:,} address/workers".format(
                len(self.workers)))
        t = time.time()
        if flush:
            upper = t + 10
        else:
            upper = (t // 60) * 60
        for worker_addr, tracker in self.workers.items():
            tracker.report(upper)
            # if the last log time was more than expiry time ago...
            if (tracker.last_log + self.config['tracker_expiry_time']) < t:
                assert sum(tracker.slices.itervalues()) == 0
                del self.workers[worker_addr]
        self.logger.info("One minute shares reported (queued) in {}".format(
            time_format(time.time() - t)))

    def log_share(self, address, worker, amount, typ):
        """ Logs a share for a user and user/worker into all three share
        aggregate sources. """
        # log the share for the pool cache total as well
        if address != "pool" and self.config['report_pool_stats']:
            self.log_share("pool", '', amount, typ)

        # collecting for reporting to the website for display in graphs
        addr_worker = (address, worker)
        if addr_worker not in self.workers:
            self.workers[addr_worker] = WorkerTracker(self, address, worker)
        self.workers[(address, worker)].count_share(amount, typ)

        # reporting for payout share logging and vardiff rates
        if typ == StratumClient.VALID_SHARE and address != "pool":
            if address not in self.addresses:
                self.addresses[address] = AddressTracker(self, address)
            # for tracking vardiff speeds
            self.addresses[address].count_share(amount)

    def kill(self, *args, **kwargs):
        self.share_reporter.kill(*args, **kwargs)
        self._report_shares(flush=True)
        self.logger.info("Flushing the reporter task queue, {} items blocking "
                         "exit".format(self.queue.qsize()))
        while not self.queue.empty():
            self._queue_proc()
        self.logger.info("Shutting down CeleryReporter..")
        Greenlet.kill(self, *args, **kwargs)