Esempio n. 1
0
    def __init__(self, name=None, **kwargs):
        Spider.__init__(self, name)

        self.conn = MySQLdb.connect(
            host="localhost", 
            user="******", 
            passwd="123456", 
            db="driving", 
            charset="utf8"
        )
        self.cursor = self.conn.cursor()

        self.redispool = redis.ConnectionPool(
            host='localhost', 
            port=6379, 
            db=0
        )

        self.redis = redis.Redis(connection_pool=self.redispool)

        urls = self.getUrls();
        for url in urls:
            done = self.hasCrawled(url)
            if done==False:
                self.start_urls.append(url)
                self.cacheTodo(url)
Esempio n. 2
0
 def __init__(self, name=None, **kwargs):
     Spider.__init__(self, name, **kwargs)
     self.db = MySQLdb.connect(host="localhost",
         user="******",
         passwd="12345689",
         db="zhaopin",
         charset='utf8')                         
     self.cursor = self.db.cursor()
Esempio n. 3
0
    def __init__(self, **kwargs):
        Spider.__init__(self, **kwargs)

        self.config_file = kwargs.get('config_file', None)
        config = kwargs.get('config', None)
        if self.config_file:
            jconfig = jsonload(open(self.config_file))
        elif config:
            jconfig = jsonloads(config)
        else:
            self.log('config_file or config is expected', level=log.CRITICAL)
            raise Exception('config_file or config is expected')

        self.template = config_parse(jconfig)

        # 指定单个要爬的入口地址,可用于测试,或者单独爬取某个页面
        self.specify_url = kwargs.get('specify_url', None)
Esempio n. 4
0
    def __init__(self, name=None, **kwargs):
        Spider.__init__(self, name)

        self.dbpool = adbapi.ConnectionPool('MySQLdb',
            db = 'driving',
            user = '******',
            passwd = '123456',
            cursorclass = MySQLdb.cursors.DictCursor,
            charset = 'utf8',
            use_unicode = False
        )

        specialCities = [110000, 120000, 310000, 500000];
        cities = json.loads(self.jsonStr)
        for city in cities:
            if city['parent'] or (city['code'] in specialCities):
                self.start_urls.append(''.join(['http://jiaxiao.jiaxiaozhijia.com/',city['pinyin']]))
                self.city_codes[city['pinyin']] = city['code']
	def __init__(self):
        	Spider.__init__(self)
Esempio n. 6
0
 def __init__(self):
     Spider.__init__(self)
     self.verificationErrors = []
     self.driver = webdriver.Firefox()
 def __init__(self):
     Spider.__init__(self)
     self.browser = webdriver.Firefox()
     self.cursor.execute(
         'create table if not exists CleaningAgents (cleaningAgentID int primary key, name varchar(20),description varchar(20),instruction varchar(20),application Time long,frequency long,cleaningAgentType varchar(20))'
     )
Esempio n. 8
0
 def __init__(self, config, **kwargs):
     Spider.__init__(self, **kwargs)
     self.config_file = kwargs.get('config_file')
     self.config = FocusedCrawlerConfigure(config, self.config_file).config
Esempio n. 9
0
	def __init__(self):
		Spider.__init__(self)
		self.browser = webdriver.Firefox()
		self.cursor.execute('create table if not exists CleaningAgents (cleaningAgentID int primary key, name varchar(20),description varchar(20),instruction varchar(20),application Time long,frequency long,cleaningAgentType varchar(20))')
Esempio n. 10
0
 def __init__(self):
     Spider.__init__(self)
     self.verificationErrors = []
     self.driver = webdriver.Firefox()