Ejemplo n.º 1
0
    def __init__(self, *args, **kwargs):
        super(Chip_deSpider, self).__init__(self, *args, **kwargs)
        self.stored_last_date = incremental_utils.get_latest_pro_review_date(
            self.mysql_manager, self.spider_conf["source_id"])

        if not self.stored_last_date:
            self.stored_last_date = datetime(1970, 1, 1)
Ejemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        super(Pcworld_enSpider, self).__init__(self, *args, **kwargs)
        self.stored_last_date = incremental_utils.get_latest_pro_review_date(
            self.mysql_manager, self.spider_conf["source_id"])
        if self.stored_last_date:
            self.stored_last_date = datetime(self.stored_last_date.year,
                                             self.stored_last_date.month, 1)
        else:
            self.stored_last_date = datetime(1970, 1, 1)

        current_date = datetime.today()
        for year in range(self.stored_last_date.year, current_date.year + 1):
            months_span = range(12)
            if year == self.stored_last_date.year:
                months_span = months_span[self.stored_last_date.month - 1:]
            if year == current_date.year and current_date.month != 12:
                months_span = months_span[:current_date.month - 12]
            for month in months_span:
                if month + 1 < 10:
                    self.sitemap_follow.append('urlset.{0}.0{1}.xml'.format(
                        year, month + 1))
                else:
                    self.sitemap_follow.append('urlset.{0}.{1}.xml'.format(
                        year, month + 1))
        # Reinit the spider in order to reload the dynamically generated sitemap_follow
        super(Pcworld_enSpider, self).__init__(self, *args, **kwargs)
Ejemplo n.º 3
0
    def __init__(self, *args, **kwargs):
        super(Ht4u_netSpider, self).__init__(self, *args, **kwargs)
        self.stored_last_date = incremental_utils.get_latest_pro_review_date(
            self.mysql_manager, self.spider_conf["source_id"])

        # In order to test another stored_last_date
        self.stored_last_date = datetime(2015, 2, 8)
Ejemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     super(TuExpertoComSpider, self).__init__(self, *args, **kwargs)
     self.stored_last_date = incremental_utils.get_latest_pro_review_date(
         self.mysql_manager, self.spider_conf['source_id'])
     if not self.stored_last_date:
         self.stored_last_date = datetime(1970, 1, 1)
     # Joins the string once so we don't do this for every evaluation
     self.included_categories_string = " ".join(self.included_categories)
Ejemplo n.º 5
0
    def __init__(self, *args, **kwargs):
        super(Notebookcheck_Spider, self).__init__(self, *args, **kwargs)
        if not self.incremental_scraping:
            self.stored_last_date = datetime(1970, 1, 1)
            return

        self.stored_last_date = incremental_utils.get_latest_pro_review_date(
            self.mysql_manager, self.spider_conf["source_id"])
Ejemplo n.º 6
0
 def __init__(self, *args, **kwargs):
     super(Pc_magazin_deSpider, self).__init__(self, *args, **kwargs)
     self.stored_last_date = incremental_utils.get_latest_pro_review_date(
         self.mysql_manager, self.spider_conf["source_id"])
     if self.stored_last_date:
         self.stored_last_date = datetime(self.stored_last_date.year,
                                          self.stored_last_date.month, 1)
     else:
         self.stored_last_date = datetime(1970, 1, 1)
Ejemplo n.º 7
0
 def __init__(self, *args, **kwargs):
     super(Thg_ruSpider, self).__init__(self, *args, **kwargs)
     self.stored_last_date = incremental_utils.get_latest_pro_review_date(
         self.mysql_manager, self.spider_conf["source_id"])
Ejemplo n.º 8
0
 def __init__(self, *args, **kwargs):
     super(LeMondeNumeriqueComSpider, self).__init__(self, *args, **kwargs)
     self.stored_last_date = incremental_utils.get_latest_pro_review_date(
         self.mysql_manager, self.spider_conf['source_id'])
     if not self.stored_last_date:
         self.stored_last_date = datetime(1970, 1, 1)
Ejemplo n.º 9
0
 def __init__(self, *args, **kwargs):
     # print "     ..._INIT_"
     super(Jonnyguru_comSpider, self).__init__(self, *args, **kwargs)
     self.stored_last_date = incremental_utils.get_latest_pro_review_date(
         self.mysql_manager, self.spider_conf["source_id"])
Ejemplo n.º 10
0
 def __init__(self, *args, **kwargs):
     super(PocketNowSpider, self).__init__(self, *args, **kwargs)
     self.get_date_re = re.compile("pocketnow.com/(\d{4}/\d{2}/\d{2})/")
     self.stored_last_date = incremental_utils.get_latest_pro_review_date(
         self.mysql_manager, self.spider_conf["source_id"])
Ejemplo n.º 11
0
 def __init__(self, *args, **kwargs):
     super(TechRadarSpider, self).__init__(self, *args, **kwargs)
     self.last_date_to_scrape = \
         incremental_utils.get_latest_pro_review_date(
             self.mysql_manager, self.spider_conf["source_id"])