argv, show_help = [], False for op, value in opts: if op == "--policy": argv.append(value) if op == "--crawler_type": try: value = int(value) crawl_policy.CRAWLER_TYPE = int(value) argv.append(value) except Exception, e: print e.message exit(1) if op == "--crawler_number": try: value = int(value) crawl_policy.CRAWLER_NUMBER = value argv.append(value) except Exception, e: print e.message exit(1) if op == "--filter_region": crawl_policy.START_FILTER[ORIGIN_REGION] = unicode(value) argv.append(value) if op == "--filter_url": crawl_policy.START_FILTER[URL] = value argv.append(value) if op == "--apply_time_interval": crawl_policy.APPLY_TIME_INTERVAL = True argv.append(value) if op == "--time_st": try:
break else: break except KeyboardInterrupt, e: print e.message break # 缓存所有爬过的url url_pool.URLPool().save() if __name__ == "__main__": from util import * from policy import Policy policy = Policy() policy.CRAWLER_NUMBER = 1 msg = [{ ORIGIN_REGION: u"广西", URL: "http://www.gxgp.gov.cn/zbxjcg/index.htm", ANNOUNCE_TYPE: u"中标公告", PURCHASE_TYPE: u"询价采购", NOTE: u"广西壮族自治区政府采购中心-询价采购" }] runner = BidRunner("test", msg=msg, policy=policy) runner.run() pass