Exemple #1
0
    def test_context_manager_timeout(self):
        sem = locks.Semaphore()
        with (yield sem.acquire(timedelta(seconds=0.01))):
            pass

        # Semaphore was released and can be acquired again.
        self.assertTrue(sem.acquire().done())
Exemple #2
0
    def test_context_manager(self):
        sem = locks.Semaphore()
        with (yield sem.acquire()) as yielded:
            self.assertTrue(yielded is None)

        # Semaphore was released and can be acquired again.
        self.assertTrue(sem.acquire().done())
Exemple #3
0
    def test_context_manager_exception(self):
        sem = locks.Semaphore()
        with self.assertRaises(ZeroDivisionError):
            with (yield sem.acquire()):
                1 / 0

        # Semaphore was released and can be acquired again.
        self.assertTrue(sem.acquire().done())
Exemple #4
0
    def test_context_manager_timeout_error(self):
        sem = locks.Semaphore(value=0)
        with self.assertRaises(gen.TimeoutError):
            with (yield sem.acquire(timedelta(seconds=0.01))):
                pass

        # Counter is still 0.
        self.assertFalse(sem.acquire().done())
    def _process(self):
        q = queues.Queue()
        start = time.time()
        fetching, fetched, collection = set(), set(), set()
        collection_lock = locks.Semaphore()

        @gen.coroutine
        def fetch_url():
            current_url = yield q.get()
            try:
                if current_url in fetching:
                    return

                if self._verbose:
                    print('fetching %s' % current_url)

                fetching.add(current_url)
                urls = yield self.get_links_from_url(current_url)
                fetched.add(current_url)

                for new_url in urls:
                    # Only follow links beneath the base URL and next pages, remember offers
                    if re.search(self._offer_pattern, new_url) and re.match(
                            self._base_url, new_url):
                        collection_lock.acquire()
                        collection.add(
                            new_url
                        )  # possible asynchronous access to synchronous object
                        collection_lock.release()

                    if re.search(self._page_pattern, new_url) and (re.match(
                            self._base_fetch_url if not self._base_fetch_url.
                            endswith('.html') else self._base_fetch_url[:-5],
                            new_url) if not self._gumtree else re.search(
                                '[a-z0-9]+$', new_url)):
                        yield q.put(new_url)

            finally:
                q.task_done()

        @gen.coroutine
        def worker():
            while True:
                yield fetch_url()

        q.put(self._base_fetch_url)

        # Start workers, then wait for the work queue to be empty.
        for _ in range(self._concurrency):
            worker()
        yield q.join(timeout=timedelta(seconds=300))
        assert fetching == fetched
        if self._verbose:
            print('Done in %d seconds, fetched %s URLs.' %
                  (time.time() - start, len(fetched)))

        self._links = list(collection)
Exemple #6
0
 def test_repr(self):
     sem = locks.Semaphore()
     self.assertIn('Semaphore', repr(sem))
     self.assertIn('unlocked,value:1', repr(sem))
     sem.acquire()
     self.assertIn('locked', repr(sem))
     self.assertNotIn('waiters', repr(sem))
     sem.acquire()
     self.assertIn('waiters', repr(sem))
Exemple #7
0
    def test_acquire_timeout_preempted(self):
        sem = locks.Semaphore(1)
        yield sem.acquire()

        # This fires before the wait times out.
        self.io_loop.call_later(0.01, sem.release)
        acquire = sem.acquire(timedelta(seconds=0.02))
        yield gen.sleep(0.03)
        yield acquire  # No TimeoutError.
Exemple #8
0
 def test_repr(self):
     sem = locks.Semaphore()
     self.assertIn("Semaphore", repr(sem))
     self.assertIn("unlocked,value:1", repr(sem))
     sem.acquire()
     self.assertIn("locked", repr(sem))
     self.assertNotIn("waiters", repr(sem))
     sem.acquire()
     self.assertIn("waiters", repr(sem))
Exemple #9
0
    def __init__(self, io_loop, hostname=None, agent_map=None, code_loader=True, environment=None, poolsize=1,
                 cricital_pool_size=5):
        super().__init__("agent", io_loop, timeout=cfg.server_timeout.get(), reconnect_delay=cfg.agent_reconnect_delay.get())

        self.poolsize = poolsize
        self.ratelimiter = locks.Semaphore(poolsize)
        self.critical_ratelimiter = locks.Semaphore(cricital_pool_size)
        self._sched = Scheduler(io_loop=self._io_loop)
        self.thread_pool = ThreadPoolExecutor(poolsize)

        if agent_map is None:
            agent_map = cfg.agent_map.get()

        self.agent_map = agent_map
        self._storage = self.check_storage()

        if environment is None:
            environment = cfg.environment.get()
            if environment is None:
                raise Exception("The agent requires an environment to be set.")
        self.set_environment(environment)

        self._instances = {}

        if code_loader:
            self._env = env.VirtualEnv(self._storage["env"])
            self._env.use_virtual_env()
            self._loader = CodeLoader(self._storage["code"])
        else:
            self._loader = None

        if hostname is not None:
            self.add_end_point_name(hostname)

        else:
            # load agent names from the config file
            agent_names = cfg.agent_names.get()
            if agent_names is not None:
                names = [x.strip() for x in agent_names.split(",")]
                for name in names:
                    if "$" in name:
                        name = name.replace("$node-name", self.node_name)

                    self.add_end_point_name(name)
Exemple #10
0
    def test_acquire_timeout(self):
        sem = locks.Semaphore(2)
        yield sem.acquire()
        yield sem.acquire()
        with self.assertRaises(gen.TimeoutError):
            yield sem.acquire(timedelta(seconds=0.01))

        f = sem.acquire()
        sem.release()
        self.assertTrue(f.done())
    def test_context_manager_async_await(self):
        # Repeat the above test using 'async with'.
        sem = locks.Semaphore()

        async def f():
            async with sem as yielded:
                self.assertTrue(yielded is None)
        yield f()

        # Semaphore was released and can be acquired again.
        self.assertTrue(sem.acquire().done())
Exemple #12
0
    def test_release_unacquired(self):
        # Unbounded releases are allowed, and increment the semaphore's value.
        sem = locks.Semaphore()
        sem.release()
        sem.release()

        # Now the counter is 3. We can acquire three times before blocking.
        self.assertTrue(sem.acquire().done())
        self.assertTrue(sem.acquire().done())
        self.assertTrue(sem.acquire().done())
        self.assertFalse(sem.acquire().done())
Exemple #13
0
    def test_context_manager_async_await(self):
        # Repeat the above test using 'async with'.
        sem = locks.Semaphore()

        namespace = exec_test(globals(), locals(), """
        async def f():
            async with sem as yielded:
                self.assertTrue(yielded is None)
        """)
        yield namespace['f']()

        # Semaphore was released and can be acquired again.
        self.assertTrue(sem.acquire().done())
Exemple #14
0
    def test_acquire_timeout(self):
        sem = locks.Semaphore(2)
        yield sem.acquire()
        yield sem.acquire()
        acquire = sem.acquire(timedelta(seconds=0.01))
        self.io_loop.call_later(0.02, sem.release)  # Too late.
        yield gen.sleep(0.3)
        with self.assertRaises(gen.TimeoutError):
            yield acquire

        sem.acquire()
        f = sem.acquire()
        self.assertFalse(f.done())
        sem.release()
        self.assertTrue(f.done())
Exemple #15
0
    def test_context_manager_contended(self):
        sem = locks.Semaphore()
        history = []

        @gen.coroutine
        def f(index):
            with (yield sem.acquire()):
                history.append('acquired %d' % index)
                yield gen.sleep(0.01)
                history.append('release %d' % index)

        yield [f(i) for i in range(2)]

        expected_history = []
        for i in range(2):
            expected_history.extend(['acquired %d' % i, 'release %d' % i])

        self.assertEqual(expected_history, history)
Exemple #16
0
    def test_acquire(self):
        sem = locks.Semaphore()
        f0 = sem.acquire()
        self.assertTrue(f0.done())

        # Wait for release().
        f1 = sem.acquire()
        self.assertFalse(f1.done())
        f2 = sem.acquire()
        sem.release()
        self.assertTrue(f1.done())
        self.assertFalse(f2.done())
        sem.release()
        self.assertTrue(f2.done())

        sem.release()
        # Now acquire() is instant.
        self.assertTrue(sem.acquire().done())
Exemple #17
0
    def test_acquire(self):
        sem = locks.Semaphore()
        f0 = asyncio.ensure_future(sem.acquire())
        self.assertTrue(f0.done())

        # Wait for release().
        f1 = asyncio.ensure_future(sem.acquire())
        self.assertFalse(f1.done())
        f2 = asyncio.ensure_future(sem.acquire())
        sem.release()
        self.assertTrue(f1.done())
        self.assertFalse(f2.done())
        sem.release()
        self.assertTrue(f2.done())

        sem.release()
        # Now acquire() is instant.
        self.assertTrue(asyncio.ensure_future(sem.acquire()).done())
        self.assertEqual(0, len(sem._waiters))
Exemple #18
0
    def _get_all_documents(self):
        q = queues.Queue()
        html_list = []
        collection_lock = locks.Semaphore()

        @gen.coroutine
        def fetch_url():
            current_url = yield q.get()
            try:
                html = yield self.get_html(current_url)

                collection_lock.acquire()
                html_list.append({'url': current_url, 'html': html})  # possible asynchronous access to synchronous object
                collection_lock.release()
            finally:
                q.task_done()

        @gen.coroutine
        def worker():
            while True:
                yield fetch_url()

        @gen.coroutine
        def feeder():
            for url in self._url_list:
                q.put(url)

        # Start workers, then wait for the work queue to be empty.
        feeder()
        if self._url_list:
            for _ in range(self._concurrency if not re.search('gratka', self._url_list[0]) else 2):
                worker()

            try:
                yield q.join(timeout=timedelta(seconds=300 if not re.search('gratka', self._url_list[0]) else 1800))

                self._html_list = html_list
            except gen.TimeoutError:
                self._html_list = []
                print("Fetching from given provider did not succeed.")
        else:
            self._html_list = []
Exemple #19
0
    def test_garbage_collection(self):
        # Test that timed-out waiters are occasionally cleaned from the queue.
        sem = locks.Semaphore(value=0)
        futures = [sem.acquire(timedelta(seconds=0.01)) for _ in range(101)]

        future = sem.acquire()
        self.assertEqual(102, len(sem._waiters))

        # Let first 101 waiters time out, triggering a collection.
        yield gen.sleep(0.02)
        self.assertEqual(1, len(sem._waiters))

        # Final waiter is still active.
        self.assertFalse(future.done())
        sem.release()
        self.assertTrue(future.done())

        # Prevent "Future exception was never retrieved" messages.
        for future in futures:
            self.assertRaises(TimeoutError, future.result)
Exemple #20
0
 def test_context_manager_misuse(self):
     # Ensure we catch a "with sem", which should be
     # "with (yield sem.acquire())".
     with self.assertRaises(RuntimeError):
         with locks.Semaphore():
             pass
Exemple #21
0
 def test_yield_sem(self):
     # Ensure we catch a "with (yield sem)", which should be
     # "with (yield sem.acquire())".
     with self.assertRaises(gen.BadYieldError):
         with (yield locks.Semaphore()):
             pass