Python Scheduler.queue_volumeの例

プログラミング言語: Python

名前空間/パッケージ名: scheduler

クラス/型: Scheduler

メソッド/関数: queue_volume

hotexamples.comのコード掲載数: 1

Python Scheduler.queue_volume - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのscheduler.Scheduler.queue_volumeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Scheduler(30)

add(18)

__init__(7)

print_schedule(4)

add_a_prerequisite(4)

get_matching_rules(4)

get_first(3)

parse_conf_file(3)

__subclasses__(3)

get_status(3)

add_listener(3)

add_job(3)

find_slot(2)

feed(2)

add_periodic_task(2)

add_new_task(2)

is_time(2)

db(2)

allocate(2)

process(2)

add_timer(2)

get_nodes(2)

do_now(2)

doCommand(2)

desired(2)

create_user(2)

finished(2)

flush_clients(2)

get_workset_status(2)

get_users(2)

remove_by_id(2)

get_next(2)

get_scheduling_period(2)

addTask(2)

addJob(2)

set_node_types(2)

get_weekday(2)

load_projects(1)

get_schingid_rescapalloc_dict(1)

get_sessionspreserved_dict(1)

is_active(1)

mensaje(1)

handle_results(1)

llamar(1)

has_pending_requests(1)

instantiateProfile(1)

is_empty(1)

INQUEUE_LIMIT(1)

print_table(1)

newContactInput(1)

コード例 #1

ファイルを表示

ファイル: findit.py プロジェクト: teffland/FindIt

def crawl_spider(spider):
    ############### TRAIN SPIDER ##############
    if spider == "train":
        # initialize the scheduling queue
        q = Scheduler()              
        # initialize all of the pipelines
        pipeline = []
        for pipe in settings.PIPELINES:
            try:
                pipeline.append( getattr( pipelines, pipe )() )
            except: 
                print "Error: Unable to initialize %s pipe" % pipe
                quit()
        # initialize the spider
        # try:
        #     s = getattr(spiders, spider)()    
        # except:
        #     print "Error: It's likely that the input spider does not exist in spiders.py"
        #     quit()
        s = spiders.Train()
        #print s.__doc__
        # add all of the start links and known links to the top level of the queue
        for url in list(s.start_urls) + list(s.known_urls):
            q.add_link(url, 0)
        q.print_queue()
        # request urls while scheduler not empty and pass to to spider
        # add returned links to the queue
        # send returned items down the pipeline
        visits = 0
        while not q.is_empty():
            wait_between_requests() # wait a random small amount of time so we're less detectable
            url, level = q.get_next_link(what_level=True)
            print "Visit #%i, Q level %i, Q volume %i" % (visits, level, q.queue_volume())
            response = get_request(url)
            if response: 
                items, extracted_links = s.parse(response, level=level) # links and items are both links
                #print "exctracted links:", links
                add_to_queue(q, extracted_links) # manage the returned links
                send_down_pipeline(pipeline, items, s) # manage the returned items
                if settings.ASK_BETWEEN_REQUESTS: raw_input("Press ENTER to continue?")
                visits += 1 

        if q.is_empty(): print "CRAWL IS FINISHED: Queue is empty"
        #if visits >= settings.MAX_CRAWLS: print "CRAWL IS FINISHED: Crawled max number of urls (%i total)" % visits

    ################ TEST SPIDER ##############
    elif spider == "test":
        print "Test case"
        q = PriorityQueue()              
        queued_links = set()
        # initialize all of the pipelines
        pipeline = []
        for pipe in settings.PIPELINES:
            try:
                pipeline.append( getattr( pipelines, pipe )() )
            except: 
                print "Error: Unable to initialize %s pipe" % pipe
                quit()
        # initialize the spider
        # try:
        #     s = spiders.Test()    
        # except:
        #     print "Error: It's likely that the input spider does not exist in spiders.py"
        #     quit()
        #print s.__doc__
        s = spiders.Test()    
        # add all of the start links and known links to the top level of the queue
        q.put((-.1, s.start_urls[0]))
        queued_links.add(s.start_urls[0])
        # request urls while scheduler not empty and pass to to spider
        # add returned links to the queue
        # send returned items down the pipeline
        visits = 0
        while not q.empty():
            wait_between_requests() # wait a random small amount of time so we're less detectable
            priority, url = q.get()
            print "Q get:", -priority, url
            print "Visit #%i, Q volume %i" % (visits, q.qsize())
            response = get_request(url)
            if response: 
                items, extracted_links = s.parse(response, level=-priority) # links and items are both links
                # print "Extracted item: ",items
                #print "extracted links:", extracted_links
                #print "exctracted links:", links
                for link in extracted_links:
                    if link[1] not in queued_links:
                        # print link
                        q.put((-link[0], link[1]))
                        queued_links.add(link[1])
                    # else:
                        # print "We already queued %s" % link[1]
                send_down_pipeline(pipeline, items, s) # manage the returned items
                if settings.ASK_BETWEEN_REQUESTS: raw_input("Press ENTER to continue?")
                visits += 1 

        if q.empty(): print "CRAWL IS FINISHED: Queue is empty"
        #if visits >= settings.MAX_CRAWLS: print "CRAWL IS FINISHED: Crawled max number of urls (%i total)" % visits

    else:
        quit()