Example #1
0
    def run(self, _input):
        if not self.options['last_date']:
            self.options['last_date'] = date.today()

        dedu = self.options['deduplicate'] and True

        n_days = (self.options['last_date'] - self.options['first_date']).days
        days = [
            self.options['first_date'] + timedelta(days=x)
            for x in range(n_days + 1)
        ]
        scrapers = [self.get_scraper(d) for d in days]
        RobustController().scrape(scrapers, deduplicate=dedu)
Example #2
0
    def run(self, _dummy=None):
        file = self.options['file']
        log.info(
            u"Importing {self.__class__.__name__} from {file.name} into {self.project}"
            .format(**locals()))
        from amcat.scraping.controller import RobustController
        self.controller = RobustController(self.articleset)

        arts = list(self.controller.scrape(self))
        if not arts:
            raise Exception("No atricles were imported")
        self.postprocess(arts)
        old_provenance = [] if self.articleset.provenance is None else [
            self.articleset.provenance
        ]
        new_provenance = self.get_provenance(file, arts)
        self.articleset.provenance = "\n".join([new_provenance] +
                                               old_provenance)
        self.articleset.save()

        return arts
Example #3
0
 def run(self,input=None,deduplicate=False):
     log.info("Scraping {self.__class__.__name__} into {self.project}, medium {self.medium} using RobustController"
              .format(**locals()))
     from amcat.scraping.controller import RobustController
     return RobustController(self.articleset).scrape([self],deduplicate)
Example #4
0
 def run(self, _input=None):
     scraper = self.options["scraper"].get_scraper(
         date=self.options["date"])
     controller = RobustController()
     controller.scrape(scraper)