def main(): while (True): # get the file queue for this run (this will lock the queue until it finishes) feedQueue = FileQueue(crawling_config.DIR_QUEUE, crawling_config.DIR_LOCKS, 'r') links = feedQueue.getItems() # goes through links for linkRaw in links: linkParts = linkRaw.split('|') if (len(linkParts) != 2): # invalid! continue link = linkParts[0] section = linkParts[1] linkFile = getLinkFile(link) if (fileExists(linkFile) == False): # the link is not fetched, we will do it now crawling_config.DEBUG('Fetching %s.' % link) fetchLink(link, linkFile, section) feedQueue.delete() # count queue files # do not wait if there are 3+ queue files... queueFiles = glob.glob(crawling_config.DIR_QUEUE + '*') if (len(queueFiles) < 3): crawling_config.DEBUG('Sleeping for %d seconds' % crawling_config.SLEEP_TIME) time.sleep(crawling_config.SLEEP_TIME) else: print 'There are %d queue files, continue immediately!' % (len(queueFiles)) print 'Bye bye'