コード例 #1
0
def crawlresourceItems(resource, IDlist, categoryname):
    
    path = IO.ensure_dir(IO.itemsPath+os.sep+resource.name+os.sep+categoryname+os.sep)
    rootlink_news = resource.rootlink_item
    
    # update IDlist. remove id's if they are in resource/categoryname
    crawledIDs = IO.getfilenames_of_dir(path, removeextension=True)
    IDlist = [newsid for newsid in IDlist if newsid not in crawledIDs]
    
    for newsid in IDlist:
        newslink = rootlink_news + str(newsid)
        if resource.name == "vakit":
            newslink += "/"
        print newslink
        extraction = getnewsitem(resource, newslink, newsid)
        if extraction:
            extraction.setcategory(categoryname)
            time.sleep(random.choice(range(3,10)))    #time.sleep(20)
            #extraction.toConsole()
            extraction.toDisc(path)