def main(): '''登录微博''' paramDict = read_config() if not login(paramDict['username'], paramDict['password']): exit() '''与数据库建立连接和指针''' pool = PooledDB(MySQLdb, int(paramDict['threadnum']), host = paramDict['dbhost'], user = paramDict['dbuser'], passwd = paramDict['dbpasswd'], db = paramDict['dbname']) conn = pool.connection() cur = conn.cursor() '''读取未爬取的链接列表放入队列''' urlQLock = threading.Lock() tableName = 'users' sql = 'select id, uid from %s where isCrawled = 0' % tableName cur.execute(sql) result = cur.fetchall() urlQ = Queue(len(result)) for entry in result: urlQ.put(entry) '''建立线程''' for i in xrange(int(paramDict['threadnum'])): thr = DownloadThread(pool, urlQ, urlQLock) threadPool.append(thr) thr.start() '''检查是否存在结束的线程,若有,则重新建立新的线程''' while True: try: sleep(60) '''当队列为空时,跳出循环''' if not urlQ.qsize(): break if threading.activeCount() < int(paramDict['threadnum']) + 1: '''检查哪个线程已经结束,将其清除''' i = 0 for thr in threadPool: if not thr.isAlive(): thr.clear() del threadPool[i] newThr = DownloadThread(pool, urlQ, urlQLock) threadPool.append(newThr) newThr.start() else: i += 1 except: print sys.exc_info()[0] for thr in threadPool: thr.end() break print 'Main thread end!'