Beispiel #1
0
def main():
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog [options]")
    parser.add_option("-D",
                      "--debug",
                      dest="debug",
                      help="debug mode.",
                      action="store_true",
                      default=False)
    parser.add_option("-c",
                      "--cpool",
                      dest="cpool_size",
                      help="size of coroutine pool.",
                      metavar="<size>",
                      type="int",
                      default=512)
    opts, args = parser.parse_args()
    sockets = []
    pool = coros.CoroutinePool(max_size=opts.cpool_size)

    if len(args) == 0:
        sockets.append(fcgi_stdin_sock())
    else:
        for arg in args:
            # todo: add support for UNIX sockets
            addr = arg.replace('[', '').replace(']', '').rsplit(':', 1)
            if len(addr) != 2:
                parser.error(
                    'arguments must be IPv4 or IPv6 addresses including port')
            addr = (addr[0], int(addr[1]))
            sockets.append(api.tcp_listener(addr))

    run(sockets, handle_connection, pool)
 def test_a_buncha_stuff(self):
     pool = coros.CoroutinePool(max_size=10)
     waiters = []
     for i in range(0, 9):
         waiters.append(pool.execute(sender_loop, i))
     for waiter in waiters:
         waiter.wait()
    def test_contention(self):
        from greentest import tpool_test
        prox = tpool.Proxy(tpool_test)

        pool = coros.CoroutinePool(max_size=4)
        waiters = []
        waiters.append(pool.execute(lambda: self.assertEquals(prox.one, 1)))
        waiters.append(pool.execute(lambda: self.assertEquals(prox.two, 2)))
        waiters.append(pool.execute(lambda: self.assertEquals(prox.three, 3)))
        for waiter in waiters:
            waiter.wait()
    def __init__(self, queue=None, max_size=DEFAULT_MAX_CONC_REQUESTS):

        self.log = log.get_logger('zenq.server.native:%x' % (id(self), ))

        # An initial queue may be provided; this might help with durable queues
        # (i.e. those that save their state to disk and can restore it on load).
        self.queue = queue or Queue()

        # The client pool is a pool of coroutines which doesn't allow more than
        # max_size coroutines to be running 'at the same time' (although
        # strictly speaking they never do anyway). In this case it represents
        # the maximum number of clients that may be connected at once.
        self.client_pool = coros.CoroutinePool(max_size=max_size)

        self.socket = None
Beispiel #5
0
    queue.put(feed['url'])
    
    while not queue.empty():
        latestDateTime = crawlFeedOnePage(feed, queue, crawlDateTime, latestWarc, warcDateTime, warcDateTime, tempdir)
    
    print "Finished crawling %s, whose feed was last updated on %s" % (feed['domain'], latestDateTime.isoformat())
        
    os.rmdir(tempdir)
    latestWarc.destroy()

    ### Create a new warc everytime instead of adding to an old one
    #renameWarc(warcFileName, feed['domain'], domain_warc_dir, latestDateTime)
    
# __main__
#_______________________________________________________________________________

assert os.path.exists(config['warc_dir'])    
writeLockFile(config['warc_dir'])

crawlDateTime = datetime.datetime.utcnow()
pool = coros.CoroutinePool(max_size=4)
waiters = []
for feed in feeds:
    waiters.append(pool.execute(crawlDomain, feed, crawlDateTime))
    

#wait until all feeds have been fetched
for waiter in waiters:
    waiter.wait()
    
rmLockFile(config['warc_dir'])