def __init__(self): self.redis_conn = get_redis_conn() self.check_queue = SETTINGS['URLS_CHECK_TASKS'] self.mongo_conn = MongodbClass() self.mysql_conn = MySqlDBClass() self.logging_actor = update_logging() self.counter = CountIitem()
def __init__(self): self.chrome_options = Options() #self.chrome_options.add_argument('--headless') #self.driver = webdriver.Chrome(chrome_options=self.chrome_options,executable_path=SETTINGS['CHROME_PATH']) self.driver = webdriver.Chrome(SETTINGS['CHROME_PATH']) self.driver.set_window_size(500,500) self.redis_conn = get_redis_conn() #self.driver = webdriver.Chrome(self.chrome_path) self.count = 0
def __init__(self,refined_totalpage=2): self.loss_urls = [] self.refined_totalpage = refined_totalpage self.redis_conn = get_redis_conn() jsonfile = os.path.join(BASE_JSONFILE_PATH,'{}.json'.format(self.name)) self.xp = Json2XPath(jsonfile).get_xpath() self.check_queue = SETTINGS['URLS_CHECK_TASKS'] self.mongo_conn = MongodbClass() self.mysql_conn = MySqlDBClass() self.logging_actor = update_logging() self.func_moc = all_modify_func[self.name]
def __init__(self): self.redis_conn = get_redis_conn() self.cookie_w_queue = SETTINGS['COOKIE_WORK'] self.cookie_c_queue = SETTINGS['COOKIE_CHECK'] self.cookie_batch_size = 6 self.cookie_indexes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] self.remain_indexes = None self.random_indexes = None #self.get_ini_random_indexes() self.calib_time = datetime.today() self.interval = 180
def __init__(self, spider_name, check_date=None, timeout=100): self.tasks = all_tasks[spider_name] self.reflesh_urls = [] self.limitpages = 3 self.conn = get_redis_conn() self.mysql_conn = MySqlDBClass() self.outdate = 0 #用来判断是否有过期的日期,如果有说明实时爬取的范围有效 self.logger_filename = spider_name self.check_date = check_date self.spider_name = spider_name print('UpdateFilterClass mysql', self.mysql_conn)
def __init__(self): super().__init__() self.redis_conn = get_redis_conn() #self.to_mysql = MongoDB_To_MySQL(self.name) self.redis_batch_size = 100 self.work_queue = SETTINGS['URLS_WORK_TASKS'] self.check_queue = SETTINGS['URLS_CHECK_TASKS'] #self.sche_updator = UpdateFilterClass(self.name) self.pre_suf = None self.workers = dict(workers)
def __init__(self, check_date=None, timeout=100): self.conn = get_redis_conn() self.mysql_conn = MySqlDBClass() self.outdate = 0 #用来判断是否有过期的日期,如果有说明实时爬取的范围有效 self.check_date = check_date self.mongo_conn = MongodbClass() self.sele_spiders = ['gongshu', 'longyou', 'zhejiangzfcg'] #print('UpdateFilterClass mysql',self.mysql_conn) self.largecities = [ 'zhejiang', 'hangzhou', 'huzhou', 'jiaxing', 'jinhua', 'lishui', 'ningboshi', 'quzhou', 'shaoxing', 'taizhou', 'wenzhou', 'zhoushan' ]
def __init__(self): self.redis_conn = get_redis_conn()
def __init__(self): self.mongo_conn = MongodbClass() self.redis_conn = get_redis_conn()
def __init__(self): self.redis_conn = get_redis_conn() self.func_moc = all_modify_func[self.name] self.check_queue = SETTINGS['URLS_CHECK_TASKS'] self.refined_totalpage = 2
def _spider_opened(self,spider): self.mongo_instance = MongodbClass() self.redis_conn = get_redis_conn()
def _spider_opened(self,spider): self.mysql_instance = MySqlDBClass() self.mongo_instance = MongodbClass() self.mysql_instance.create_table('t_zhaobiao') self.redis_conn = get_redis_conn() self.counter = CountIitem()
def __init__(self, timeout=300): self.conn = get_redis_conn() self.filepath = SETTINGS['CDATA_FILE_PATH'] self.error_filepath = SETTINGS['EDATA_FILE_PATH']