Esempio n. 1
0
 argv, show_help = [], False
 for op, value in opts:
     if op == "--policy":
         argv.append(value)
     if op == "--crawler_type":
         try:
             value = int(value)
             crawl_policy.CRAWLER_TYPE = int(value)
             argv.append(value)
         except Exception, e:
             print e.message
             exit(1)
     if op == "--crawler_number":
         try:
             value = int(value)
             crawl_policy.CRAWLER_NUMBER = value
             argv.append(value)
         except Exception, e:
             print e.message
             exit(1)
     if op == "--filter_region":
         crawl_policy.START_FILTER[ORIGIN_REGION] = unicode(value)
         argv.append(value)
     if op == "--filter_url":
         crawl_policy.START_FILTER[URL] = value
         argv.append(value)
     if op == "--apply_time_interval":
         crawl_policy.APPLY_TIME_INTERVAL = True
         argv.append(value)
     if op == "--time_st":
         try:
Esempio n. 2
0
                            break
                else:
                    break
            except KeyboardInterrupt, e:
                print e.message
                break

        # 缓存所有爬过的url
        url_pool.URLPool().save()


if __name__ == "__main__":
    from util import *
    from policy import Policy

    policy = Policy()
    policy.CRAWLER_NUMBER = 1

    msg = [{
        ORIGIN_REGION: u"广西",
        URL: "http://www.gxgp.gov.cn/zbxjcg/index.htm",
        ANNOUNCE_TYPE: u"中标公告",
        PURCHASE_TYPE: u"询价采购",
        NOTE: u"广西壮族自治区政府采购中心-询价采购"
    }]

    runner = BidRunner("test", msg=msg, policy=policy)
    runner.run()

    pass