Esempio n. 1
0
def test_celery():
    # g1 = group(add.si(2, 3))
    # g2 = group([add.si(4, 4)])
    # s1 = chord(g1, dummy.si())
    # s2 = chord(g2, dummy.si())
    # # func = chain([g1, g2]) | final_callback.s()
    # func = s1 | s2 | final_callback.s()
    # res = func()
    # print(res)

    data = [
        '1',
        '2',
        '3',
        '4',
    ] * 1
    info = {
        'celery_worker': 'test.functional.test_celery_group_little.simple',
        'worker': 'test.functional.test_celery_group_little.worker_do_sth',
        'celery_max_workers': 1,
        'celery_chunk_size': 2,
        'chunk_size': 2,
        'queue': 'a1',
        'dummy': 'test.functional.test_celery.dummy',
    }
    # resp = work(data=data,

    worker = Worker(mode='celery')
    resp = worker.work(data, info)
    return resp
Esempio n. 2
0
def test(self, data, info):
    print(data, '1111')
    print(info, '1111')
    sub_info = deepcopy(info)
    sub_info['chunk_size'] = info['sub_chunk_size']
    sub_info['worker'] = info['sub_worker']

    worker = Worker(mode='thread')
    resp = worker.work(data, sub_info)
Esempio n. 3
0
def xtest_it():
    print('>>>>')
    data = [
               'u11', 'u22', 'u33', 'u44',
               'u21', 'u22', 'u23', 'u24',
               'u31', 'u32', 'u33', 'u34',
               'u41', 'u42', 'u43', 'u44',
               'u51', 'u52', 'u53', 'u54',
           ] * 2
    info = {
        'worker': 'test.functional.test_worker_coroutine.worker_do_sth',
        'chunk_size': 4,
    }
    worker = Worker(mode='coroutine')
    resp = worker.work(data, info)
    print(resp)
Esempio n. 4
0
def xtest_celery():
    # g1 = group(add.si(2, 3))
    # g2 = group([add.si(4, 4)])
    # s1 = chord(g1, dummy.si())
    # s2 = chord(g2, dummy.si())
    # # func = chain([g1, g2]) | final_callback.s()
    # func = s1 | s2 | final_callback.s()
    # res = func()
    # print(res)




    data = [
        'u11', 'u22', 'u33', 'u44', 'u21', 'u22', 'u23', 'u24',
        'u31', 'u32',
        'u33', 'u34',
        'u41', 'u42', 'u43', 'u44', 'u51', 'u52', 'u53', 'u54',
    ]*1
    info = {
        'celery_worker': 'test.functional.test_celery.simple',
        'worker': 'test.functional.test_celery.worker_do_sth',
        'celery_max_workers': 1,
        'celery_chunk_size': 40,
        'chunk_size': 40,
        # 'final_callback': 'test.functional.test_celery.final_callback_si',
        'dummy': 'test.functional.test_celery.dummy',
        'sync_callback': 'test.functional.test_celery.final_callback',
        'each_callback': 'test.functional.test_celery.callback',
        # 'queue': 'worker'
    }
    # resp = work(data=data,

    worker = Worker(mode='celery')
    resp = worker.work(data, info)
    return resp
Esempio n. 5
0
        'netboy.celery.tasks.pycurl_worker',
        'worker':
        'netboy.celery.tasks.multicurl_worker_do_crawl',
        'celery_max_workers':
        4,
        'celery_chunk_size':
        10,
        'chunk_size':
        5,
        'queue':
        'worker',
        'dummy':
        'netboy.celery.tasks.dummy',
        'filter': ['url', 'cookielist'],
        'triggers': [
            {
                'hello': 'world'
            },
            {
                'hello2': 'world2'
            },
            {
                'trigger': 'netboy.support.trigger.trig_it'
            },
        ],
        'analysers': ['netboy.support.analysers.analyse_it']
    }
    worker = Worker(mode='celery')
    resp = worker.work(data, info)
    print(resp)
Esempio n. 6
0
def work_do_sth(data, info):

    for i in data:
        get_picture(i)


if __name__ == "__main__":
    # a = get_picture()
    # set(a)

    a = time.time()
    data = [1, 2, 3, 4, 5, 6]
    info = {'worker': 'pachong.work_do_sth', 'chunk_size': 2}
    ww = Worker(mode="thread")
    resp = ww.work(data, info)

    # 直接爬取
    # for i in data:
    #     get_picture(i)

    # for i in list(range(1,7)):
    #     th1 = threading.Thread(target=get_picture, args=(i,))
    #     th1.start()

    # th1 = threading.Thread(target=get_picture, args=(1,))
    #
    # th1.start()
    # th2 = threading.Thread(target=get_picture, args=(2,))
    # th2.start()
    # th1.join()
Esempio n. 7
0
class NetBoy:
    def __init__(self, info=None):
        self.info = info if info else {}
        self.info['dummy'] = 'netboy.celery.tasks.dummy'
        self.info['log'] = 'netboy'

    def use_socks5_proxy(self, proxy):
        p = proxy.split(':')
        self.info['proxytype'] = 'socks5'
        self.info['proxy'] = p[0]
        self.info['proxyport'] = int(p[1])
        return self

    def use_http_proxy(self, proxy):
        p = proxy.split(':')
        self.info['proxytype'] = 'http'
        self.info['proxy'] = p[0]
        self.info['proxyport'] = int(p[1])
        return self

    def use_queue(self, queue):
        self.info['queue'] = queue
        return self

    def use_logger(self, log_name):
        self.info['log'] = log_name
        return self

    def use_filter(self, result_filter):
        self.info['filter'] = result_filter
        return self

    def use_prepares(self, prepares):
        self.info['prepares'] = prepares
        return self

    def use_triggers(self, triggers):
        self.info['triggers'] = triggers
        return self

    def use_analysers(self, analysers):
        self.info['analysers'] = analysers
        return self

    def use_auth(self, user, password, group='default'):
        self.info['auth'] = {
            'user': user,
            'password': password,
            'group': group
        }
        return self

    def use_useragent(self, useragent):
        self.info['useragent'] = useragent
        return self

    def use_timeout(self, timeout=None, connect=None, wait=None, script=None):
        if timeout:
            self.info['timeout'] = timeout
        if connect:
            self.info['connecttimeout'] = connect
        if wait:
            self.info['wait'] = wait
        if script:
            self.info['script_timeout'] = script
        return self

    def use_info(self, info):
        self.info = info
        return self

    def use_final(self, final):
        self.info['final'] = final
        return self

    def use_mode(self, mode):
        self.info['mode'] = mode
        spider = self.info.get('spider')
        if spider:
            self.use_spider(spider)
        else:
            if mode == 'coroutine':
                self.use_spider('aiohttp')
        return self

    def use_spider(self, spider='pycurl'):
        self.info['spider'] = spider
        mode = self.info.get('mode', 'thread')
        if mode == 'celery':
            if spider == 'pycurl':
                self.info[
                    'celery_worker'] = 'netboy.celery.tasks.pycurl_worker'
                self.info[
                    'worker'] = 'netboy.celery.tasks.multicurl_worker_do_crawl'
                self.info[
                    'final_callback'] = 'netboy.celery.tasks.final_callback'
            elif spider == 'chrome':
                self.info[
                    'celery_worker'] = 'netboy.celery.tasks.thread_worker'
                self.info[
                    'worker'] = 'netboy.celery.tasks.chrome_worker_do_crawl'
                self.info[
                    'final_callback'] = 'netboy.celery.tasks.final_callback'
        elif mode == 'coroutine' and spider == 'aiohttp':
            self.info[
                'worker'] = 'netboy.aio_http.aiohttp_handler.aiohttp_handler'
        else:
            if spider == 'pycurl':
                self.info[
                    'worker'] = 'netboy.multi_pycurl.multicurl_handler.curl_handler'
            elif spider == 'chrome':
                self.info[
                    'worker'] = 'netboy.selenium_chrome.chrome_driver_handler.chrome_driver_handler'
            elif spider == 'aiohttp':
                self.info[
                    'worker'] = 'netboy.aio_http.aiohttp_handler.aio_http_handler'
        return self

    def use_workers(self, workers=8, chunk_size1=40, chunk_size2=8):
        self.info['celery_max_workers'] = workers
        self.info['max_workers'] = workers
        self.info['celery_chunk_size'] = max(chunk_size1, chunk_size2)
        self.info['chunk_size'] = min(chunk_size1, chunk_size2)
        return self

    def use_logger(self, logger):
        self.info['log'] = logger
        return self

    def use_chrome(self, chrome):
        self.info['chrome'] = chrome
        self.use_spider('chrome')
        return self

    def use_window(self, window):
        self.info['window_size'] = window
        self.use_spider('chrome')
        return self

    def run(self, data):
        self.worker = Worker(mode=self.info.get('mode', 'thread'))
        resp = self.worker.work(data, self.info)
        return resp

    def run_remote(self, url, data, callback_data=None):
        self.worker = Worker(mode=self.info.get('mode', 'thread'))
        triggers = self.info.get('triggers')
        trigger_payload = {'trigger': 'netboy.support.triggers.post_it'}
        if callback_data:
            trigger_payload.update(callback_data)
            if triggers:
                self.info['triggers'].append(trigger_payload)
            else:
                self.info['triggers'] = [trigger_payload]

        payload = {
            'url': url,
            'method': 'post',
            'postfields': {
                'info': copy(self.info),
                'data': data
            }
        }

        resp = curl_work(payload, logger='netboy')
        return resp

    def register_remote(self, url, user, password, group='default'):
        payload = {
            'url': url,
            'method': 'post',
            'postfields': {
                'user': user,
                'password': password,
                'group': group
            }
        }

        resp = curl_work(payload, logger='netboy')
        return resp