def setUp(self): #self._config = CrawlerConfig("./crawler/config/crawler.config") self._GLOBAL_SETTINGS = configureCrawler()
# -*- coding: utf-8 -*- from __future__ import with_statement import os import re import time from crawler.apps.categorymatcher import CategorySetterExe,CategoryValidator from crawler.apps.scoreler import StdScoreler from crawler.apps.scrapyer import GoogSearchScrapyer from crawler.apps.outputwriter import Url2JsonWriter from crawler.config.crawlersetting import configureCrawler #import contrib.JSONStreamWriter.JSONStreamWriter as JSONStreamWriter GLOBAL_SETTINGS = configureCrawler() if __name__ == "__main__": print u"main start!" try: scraper = GoogSearchScrapyer() categorysetter = CategorySetterExe() scoreler = StdScoreler() writer = Url2JsonWriter() exepath = GLOBAL_SETTINGS["subprocess"]["name"] tmpdir = GLOBAL_SETTINGS["directory"]["transaction"] result_json = GLOBAL_SETTINGS["directory"]["jsonoutput"] no_cat_url_list = GLOBAL_SETTINGS["directory"]["noncategorizedurls"] except Exception as e: raise Exception i=0