Esempio n. 1
0
def main():
    scriptcontrol.setStart()
    urlqueue = url_catch_queue.UrlQueue()
    urlget = parsehtml.ParseHtml()
    exceptTimes = 0
    while scriptcontrol.isContinue():
    #while exceptTimes < 2:
        exceptTimes += 1
        try:
            userid = urlqueue.geturl()

            if userid is None:
                url = "http://weibo.cn/rmrb"
                url = "http://weibo.cn/5449728978";
            else:
                url = "http://weibo.cn/%s" % userid

            print(url);

            info, urllist = urlget.exetparse(url);
            #info = {'weibos': '52780', 'id': '2803301701', 'follow': '40001594', 'sex': '1', 'birthday': '19480615', 'interes': '1249', 'address': '北京', 'nickname': '人民日报'}
            #urllist = {'1642591402', '1894467483', '2737798435', '3011694992', '2618638282', '2192630467', '3363206842', '3183107112', '1726918143', '2641686425', '1893801487', '1642512402', '1644948230'}

            if info is None:
                urlqueue.inserturl(list(userid));
            else:
                urlqueue.updateAlluser(info)
                if int(info["follow"]) > 1000000:
                    urlqueue.updateFirstuser(info)

            if urllist is not None:
                for interes in urllist:
                    urlqueue.inserturl(interes)
        except Exception:
            nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
            exceptTimes += 1
            print("main exception time: " + nowtime + " " + str(userid));

    urlqueue.stop()
Esempio n. 2
0
            else:
                urlqueue.updateAlluser(info)
                if int(info["follow"]) > 1000000:
                    urlqueue.updateFirstuser(info)

            if urllist is not None:
                for interes in urllist:
                    urlqueue.inserturl(interes)
        except Exception:
            nowtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
            exceptTimes += 1
            print("main exception time: " + nowtime + " " + str(userid));

    urlqueue.stop()

if __name__ == "__main__":
    scriptcontrol.setStart()
    paramlen = (len(sys.argv))
    print("param num is : " + str(paramlen))
    if paramlen < 2:
        main()
    elif paramlen == 2:
        if sys.argv[1] == "start":
            main()
        elif sys.argv[1] == "stop":
            scriptcontrol.setStop();
        else:
            print("param error")
    else:
        print("param error")