Example #1
0
 def setUp(self):
     #self._config = CrawlerConfig("./crawler/config/crawler.config")
     self._GLOBAL_SETTINGS = configureCrawler()
Example #2
0
# -*- coding: utf-8 -*-

from __future__ import with_statement
import os
import re
import time
from crawler.apps.categorymatcher import CategorySetterExe,CategoryValidator
from crawler.apps.scoreler import StdScoreler
from crawler.apps.scrapyer import GoogSearchScrapyer
from crawler.apps.outputwriter import Url2JsonWriter
from crawler.config.crawlersetting import configureCrawler
#import contrib.JSONStreamWriter.JSONStreamWriter as JSONStreamWriter

GLOBAL_SETTINGS = configureCrawler()

if __name__ == "__main__":

    print u"main start!" 
    try:
        scraper        = GoogSearchScrapyer()
        categorysetter = CategorySetterExe()
        scoreler       = StdScoreler()
        writer         = Url2JsonWriter()
        exepath        = GLOBAL_SETTINGS["subprocess"]["name"]
        tmpdir         = GLOBAL_SETTINGS["directory"]["transaction"]
        result_json    = GLOBAL_SETTINGS["directory"]["jsonoutput"]
        no_cat_url_list = GLOBAL_SETTINGS["directory"]["noncategorizedurls"]
    except Exception as e:
        raise Exception

    i=0