def __init__(self, name=None, **kwargs): Spider.__init__(self, name) self.conn = MySQLdb.connect( host="localhost", user="******", passwd="123456", db="driving", charset="utf8" ) self.cursor = self.conn.cursor() self.redispool = redis.ConnectionPool( host='localhost', port=6379, db=0 ) self.redis = redis.Redis(connection_pool=self.redispool) urls = self.getUrls(); for url in urls: done = self.hasCrawled(url) if done==False: self.start_urls.append(url) self.cacheTodo(url)
def __init__(self, name=None, **kwargs): Spider.__init__(self, name, **kwargs) self.db = MySQLdb.connect(host="localhost", user="******", passwd="12345689", db="zhaopin", charset='utf8') self.cursor = self.db.cursor()
def __init__(self, **kwargs): Spider.__init__(self, **kwargs) self.config_file = kwargs.get('config_file', None) config = kwargs.get('config', None) if self.config_file: jconfig = jsonload(open(self.config_file)) elif config: jconfig = jsonloads(config) else: self.log('config_file or config is expected', level=log.CRITICAL) raise Exception('config_file or config is expected') self.template = config_parse(jconfig) # 指定单个要爬的入口地址,可用于测试,或者单独爬取某个页面 self.specify_url = kwargs.get('specify_url', None)
def __init__(self, name=None, **kwargs): Spider.__init__(self, name) self.dbpool = adbapi.ConnectionPool('MySQLdb', db = 'driving', user = '******', passwd = '123456', cursorclass = MySQLdb.cursors.DictCursor, charset = 'utf8', use_unicode = False ) specialCities = [110000, 120000, 310000, 500000]; cities = json.loads(self.jsonStr) for city in cities: if city['parent'] or (city['code'] in specialCities): self.start_urls.append(''.join(['http://jiaxiao.jiaxiaozhijia.com/',city['pinyin']])) self.city_codes[city['pinyin']] = city['code']
def __init__(self): Spider.__init__(self)
def __init__(self): Spider.__init__(self) self.verificationErrors = [] self.driver = webdriver.Firefox()
def __init__(self): Spider.__init__(self) self.browser = webdriver.Firefox() self.cursor.execute( 'create table if not exists CleaningAgents (cleaningAgentID int primary key, name varchar(20),description varchar(20),instruction varchar(20),application Time long,frequency long,cleaningAgentType varchar(20))' )
def __init__(self, config, **kwargs): Spider.__init__(self, **kwargs) self.config_file = kwargs.get('config_file') self.config = FocusedCrawlerConfigure(config, self.config_file).config
def __init__(self): Spider.__init__(self) self.browser = webdriver.Firefox() self.cursor.execute('create table if not exists CleaningAgents (cleaningAgentID int primary key, name varchar(20),description varchar(20),instruction varchar(20),application Time long,frequency long,cleaningAgentType varchar(20))')