def run(self, _input): if not self.options['last_date']: self.options['last_date'] = date.today() dedu = self.options['deduplicate'] and True n_days = (self.options['last_date'] - self.options['first_date']).days days = [ self.options['first_date'] + timedelta(days=x) for x in range(n_days + 1) ] scrapers = [self.get_scraper(d) for d in days] RobustController().scrape(scrapers, deduplicate=dedu)
def run(self, _dummy=None): file = self.options['file'] log.info( u"Importing {self.__class__.__name__} from {file.name} into {self.project}" .format(**locals())) from amcat.scraping.controller import RobustController self.controller = RobustController(self.articleset) arts = list(self.controller.scrape(self)) if not arts: raise Exception("No atricles were imported") self.postprocess(arts) old_provenance = [] if self.articleset.provenance is None else [ self.articleset.provenance ] new_provenance = self.get_provenance(file, arts) self.articleset.provenance = "\n".join([new_provenance] + old_provenance) self.articleset.save() return arts
def run(self,input=None,deduplicate=False): log.info("Scraping {self.__class__.__name__} into {self.project}, medium {self.medium} using RobustController" .format(**locals())) from amcat.scraping.controller import RobustController return RobustController(self.articleset).scrape([self],deduplicate)
def run(self, _input=None): scraper = self.options["scraper"].get_scraper( date=self.options["date"]) controller = RobustController() controller.scrape(scraper)