Example #1
0
def dealer_process():
    app_log.info('start dealer process')
    log_fds('start')
    log_mem('start')
    s = Storage()
    q = Q()

    while True:
        log_fds('start loop')
        log_mem('start loop')
        domains = yield s.fetch_domains_for_update(options.dealer_domains_per_task)
        if domains and len(domains) < options.dealer_domains_per_task:
            time.sleep(options.dealer_fetch_task_sleep_period_sec)
            domains = yield s.fetch_domains_for_update(options.dealer_domains_per_task)

        if not domains:
            app_log.info("not found domains")
            time.sleep(options.dealer_sleep_period_sec)
            continue

        app_log.info("fetch %d domains for new task" % len(domains))
        res = q.add_crawler_task(domains)
        yield s.update_domains_after_fetch(domains)
        app_log.info("add task %s" % res)
        del domains

    app_log.info('end dealer process')
Example #2
0
def dealer_process():
    app_log.info('start dealer process')
    log_fds('start')
    log_mem('start')
    s = Storage()
    q = Q()

    while True:
        log_fds('start loop')
        log_mem('start loop')
        domains = yield s.fetch_domains_for_update(
            options.dealer_domains_per_task)
        if domains and len(domains) < options.dealer_domains_per_task:
            time.sleep(options.dealer_fetch_task_sleep_period_sec)
            domains = yield s.fetch_domains_for_update(
                options.dealer_domains_per_task)

        if not domains:
            app_log.info("not found domains")
            time.sleep(options.dealer_sleep_period_sec)
            continue

        app_log.info("fetch %d domains for new task" % len(domains))
        res = q.add_crawler_task(domains)
        yield s.update_domains_after_fetch(domains)
        app_log.info("add task %s" % res)
        del domains

    app_log.info('end dealer process')
Example #3
0
def parser_process():
    app_log_process('start parser process')
    log_fds('start')
    log_mem('start')
    q = Q()
    s = Storage()
    parser = Parser(s)

    while True:
        log_fds('start loop')
        log_mem('start loop')
        task = q.get_parser_task()
        if task:
            yield parser.run(task[2])
            q.complete_task(task[0])
        else:
            app_log_process("not found task")
            time.sleep(options.parser_sleep_period_sec)

    app_log_process('end parser process')
Example #4
0
def crawler_process():
    app_log_process('start crawler process')
    log_fds('start')
    log_mem('start')
    q = Q()
    s = Storage()

    while True:
        log_fds('start loop')
        log_mem('start loop')
        task = q.get_crawler_task()
        if task:
            crawler = Crawler(task[2], q, s)
            yield crawler.run()
            q.complete_task(task[0])
            del crawler
        else:
            app_log_process("not found task")
            time.sleep(options.crawler_sleep_period_sec)

    app_log_process('end crawler process')
    log_fds('end')
Example #5
0
def crawler_process():
    app_log_process('start crawler process')
    log_fds('start')
    log_mem('start')
    q = Q()
    s = Storage()

    while True:
        log_fds('start loop')
        log_mem('start loop')
        task = q.get_crawler_task()
        if task:
            crawler = Crawler(task[2], q, s)
            yield crawler.run()
            q.complete_task(task[0])
            del crawler
        else:
            app_log_process("not found task")
            time.sleep(options.crawler_sleep_period_sec)

    app_log_process('end crawler process')
    log_fds('end')