def main(): scriptcontrol.setStart() urlqueue = url_catch_queue.UrlQueue() urlget = parsehtml.ParseHtml() exceptTimes = 0 while scriptcontrol.isContinue(): #while exceptTimes < 2: exceptTimes += 1 try: userid = urlqueue.geturl() if userid is None: url = "http://weibo.cn/rmrb" url = "http://weibo.cn/5449728978"; else: url = "http://weibo.cn/%s" % userid print(url); info, urllist = urlget.exetparse(url); #info = {'weibos': '52780', 'id': '2803301701', 'follow': '40001594', 'sex': '1', 'birthday': '19480615', 'interes': '1249', 'address': '北京', 'nickname': '人民日报'} #urllist = {'1642591402', '1894467483', '2737798435', '3011694992', '2618638282', '2192630467', '3363206842', '3183107112', '1726918143', '2641686425', '1893801487', '1642512402', '1644948230'} if info is None: urlqueue.inserturl(list(userid)); else: urlqueue.updateAlluser(info) if int(info["follow"]) > 1000000: urlqueue.updateFirstuser(info) if urllist is not None: for interes in urllist: urlqueue.inserturl(interes) except Exception: nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) exceptTimes += 1 print("main exception time: " + nowtime + " " + str(userid)); urlqueue.stop()
else: urlqueue.updateAlluser(info) if int(info["follow"]) > 1000000: urlqueue.updateFirstuser(info) if urllist is not None: for interes in urllist: urlqueue.inserturl(interes) except Exception: nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) exceptTimes += 1 print("main exception time: " + nowtime + " " + str(userid)); urlqueue.stop() if __name__ == "__main__": scriptcontrol.setStart() paramlen = (len(sys.argv)) print("param num is : " + str(paramlen)) if paramlen < 2: main() elif paramlen == 2: if sys.argv[1] == "start": main() elif sys.argv[1] == "stop": scriptcontrol.setStop(); else: print("param error") else: print("param error")