Beispiel #1
0
def requestURL(lq):
    from baseconfig import defaultconfig
    while True:
        target = lq.get()
        if target == None:
            time.sleep(10)
            continue
        config = defaultconfig()
        url = target['url']
        header = config.DEFAULT_REQUESTHEADER
        if url == '':
            return
        response = requests.get(url=url, header=header)
        print response.content
Beispiel #2
0
def startDownloader(lq):
    from dbOperator import DBOperator
    from baseconfig import defaultconfig
    conf = defaultconfig()
    dbo = DBOperator(config=conf)
    while True:
        getSize = conf.DOWNLOADER_QUEUE_SIZE - lq.qsize()
        if getSize > 0:
            urls = dbo.getURL(maxdataLength=conf.DOWNLOADER_QUEUE_SIZE)
            print 'Process #%s Get %s URLS FOR Request' % (str(
                os.getpid()), str(len(urls)))
            for i in urls:
                lq.put(urls)
        else:
            print 'Reach Max Queue Size Wait...'
        time.sleep(15)
Beispiel #3
0
def start():
    from baseconfig import defaultconfig
    config = defaultconfig()
    zContext = zmq.Context()
    zSocket = zContext.socket(zmq.SUB)
    zSocket.connect('tcp://127.0.0.1:32768')
    zSocket.setsockopt(zmq.SUBSCRIBE, '')
    processPool = Pool(processes=config.DOWNLOADER_PROCESS)
    for i in range(0, 2):
        processPool.apply_async(startProcessor, ())
    print 'Wait Dispatcher Signal...'
    sRecv = zSocket.recv()
    if (sRecv == 'end'):
        print 'Receive Dispatcher End Signal...'
        processPool.terminate()
    processPool.join()
Beispiel #4
0
        collection = self.getCollection(self.instanceConf.DB_WAITURL)
        waitforinsert = []
        for i in urls:
            result = collection.find_one(filter={'url': i['url']})
            if result == None:
                waitforinsert.append(i)
        if waitforinsert != []:
            collection.insert_many(waitforinsert)

    def getURL(self, maxdataLength=100):
        collection = self.getCollection(self.instanceConf.DB_WAITURL)
        result = []
        for i in range(0, maxdataLength):
            rs = collection.find_one_and_update(
                filter={'status': URLStatus.WAITFORHANDLE},
                update={'$set': {
                    'status': URLStatus.PROCESSING
                }})
            if rs == None:
                break
            result.append(rs)
        return result


if __name__ == '__main__':
    from baseconfig import defaultconfig
    conf = defaultconfig()
    dbo = DBOperator(config=conf)
    dbo.initDB()
    print 'Unit Test Finished.'
Beispiel #5
0
            urls = dbo.getURL(maxdataLength=conf.DOWNLOADER_QUEUE_SIZE)
            print 'Process #%s Get %s URLS FOR Request' % (str(
                os.getpid()), str(len(urls)))
            for i in urls:
                localQueue.put(urls)
        else:
            print 'Reach Max Queue Size Wait...'
        time.sleep(15)


def start():
    from baseconfig import defaultconfig
    config = defaultconfig()
    zContext = zmq.Context()
    zSocket = zContext.socket(zmq.SUB)
    zSocket.connect('tcp://127.0.0.1:32768')
    zSocket.setsockopt(zmq.SUBSCRIBE, '')
    processPool = Pool(processes=config.DOWNLOADER_PROCESS)
    for i in range(0, 2):
        processPool.apply_async(startProcessor, ())
    print 'Wait Dispatcher Signal...'
    sRecv = zSocket.recv()
    if (sRecv == 'end'):
        print 'Receive Dispatcher End Signal...'
        processPool.terminate()
    processPool.join()


if __name__ == '__main__':
    defaultconfig = defaultconfig()