def __init__(self, *args, **kwargs): super(Chip_deSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"]) if not self.stored_last_date: self.stored_last_date = datetime(1970, 1, 1)
def __init__(self, *args, **kwargs): super(Pcworld_enSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"]) if self.stored_last_date: self.stored_last_date = datetime(self.stored_last_date.year, self.stored_last_date.month, 1) else: self.stored_last_date = datetime(1970, 1, 1) current_date = datetime.today() for year in range(self.stored_last_date.year, current_date.year + 1): months_span = range(12) if year == self.stored_last_date.year: months_span = months_span[self.stored_last_date.month - 1:] if year == current_date.year and current_date.month != 12: months_span = months_span[:current_date.month - 12] for month in months_span: if month + 1 < 10: self.sitemap_follow.append('urlset.{0}.0{1}.xml'.format( year, month + 1)) else: self.sitemap_follow.append('urlset.{0}.{1}.xml'.format( year, month + 1)) # Reinit the spider in order to reload the dynamically generated sitemap_follow super(Pcworld_enSpider, self).__init__(self, *args, **kwargs)
def __init__(self, *args, **kwargs): super(Ht4u_netSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"]) # In order to test another stored_last_date self.stored_last_date = datetime(2015, 2, 8)
def __init__(self, *args, **kwargs): super(TuExpertoComSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf['source_id']) if not self.stored_last_date: self.stored_last_date = datetime(1970, 1, 1) # Joins the string once so we don't do this for every evaluation self.included_categories_string = " ".join(self.included_categories)
def __init__(self, *args, **kwargs): super(Notebookcheck_Spider, self).__init__(self, *args, **kwargs) if not self.incremental_scraping: self.stored_last_date = datetime(1970, 1, 1) return self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"])
def __init__(self, *args, **kwargs): super(Pc_magazin_deSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"]) if self.stored_last_date: self.stored_last_date = datetime(self.stored_last_date.year, self.stored_last_date.month, 1) else: self.stored_last_date = datetime(1970, 1, 1)
def __init__(self, *args, **kwargs): super(Thg_ruSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"])
def __init__(self, *args, **kwargs): super(LeMondeNumeriqueComSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf['source_id']) if not self.stored_last_date: self.stored_last_date = datetime(1970, 1, 1)
def __init__(self, *args, **kwargs): # print " ..._INIT_" super(Jonnyguru_comSpider, self).__init__(self, *args, **kwargs) self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"])
def __init__(self, *args, **kwargs): super(PocketNowSpider, self).__init__(self, *args, **kwargs) self.get_date_re = re.compile("pocketnow.com/(\d{4}/\d{2}/\d{2})/") self.stored_last_date = incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"])
def __init__(self, *args, **kwargs): super(TechRadarSpider, self).__init__(self, *args, **kwargs) self.last_date_to_scrape = \ incremental_utils.get_latest_pro_review_date( self.mysql_manager, self.spider_conf["source_id"])