def scrapers_openspendingdoc(document_id): config = load_config() es = setup_elasticsearch(config) kwargs = { 'config': config, 'document_id': document_id } OpenSpendingDocumentScraperRunner().run(**kwargs)
def scrapers_openspending(date_from, date_to): config = load_config() es = setup_elasticsearch(config) try: df = date_from.isoformat() except AttributeError as e: df = str(date_from) try: dt = date_to.isoformat() except AttributeError as e: dt = str(date_to) kwargs = { 'config': config, 'date_from': df, 'date_to': dt } OpenSpendingScraperRunner().run(**kwargs)
def scrapers_poliflw(date_from, date_to, scroll): config = load_config() es = setup_elasticsearch(config) try: df = date_from.isoformat() except AttributeError as e: df = str(date_from) try: dt = date_to.isoformat() except AttributeError as e: dt = str(date_to) kwargs = { 'config': config, 'date_from': df, 'date_to': dt, 'scroll': scroll } PoliflwScraperRunner().run(**kwargs)
def run(self): items = [] for scraper in self.scrapers: k = scraper() try: k.items = [] k.run() items += k.items except Exception as e: logging.error(e) raise e logging.info('Fetching resulted in %s items ...' % (len(items))) locations = self.aggregate(items) es = setup_elasticsearch() for l in locations: l['_id'] = l['id'] l['_index'] = 'jodal_locations' result = bulk(es, locations, False)
def scrapers_obv_counts(date_from, date_to, threshold, organizations): config = load_config() es = setup_elasticsearch(config) try: df = date_from.isoformat() except AttributeError as e: df = str(date_from) try: dt = date_to.isoformat() except AttributeError as e: dt = str(date_to) kwargs = { 'config': config, 'date_from': df, 'date_to': dt, 'threshold': threshold, 'organizations': organizations } OpenbesluitvormingCountsScraperRunner().run(**kwargs)
def es_put_template(template_dir): """ Put a template into Elasticsearch. A template contains settings and mappings that should be applied to multiple indices. Check ``mappings/template.json`` for an example. :param template_file: Path to JSON file containing the template. Defaults to ``mappings/template.json``. """ config = load_config() es = setup_elasticsearch(config) click.echo('Putting ES template from dir: %s' % template_dir) for template_path in glob.glob(os.path.join(template_dir, 'es-*.json')): click.echo(template_path) template = {} with open(template_path, 'rb') as template_file: template = json.load(template_file) template_name = os.path.basename(template_file.name.replace('es-','').replace('.json', '')) es.indices.put_template(template_name, template) index_name = 'jodal_%s' % (template_name) if not es.indices.exists(index=index_name): click.echo("Should make index %s" % (index_name,)) es.indices.create(index=index_name)
def scrapers_locations(): config = load_config() es = setup_elasticsearch(config) LocationsScraperRunner().run()
def _init_es(self): if self.es is None: logging.info('Elasticsearch: setting up') self.es = setup_elasticsearch()
def fetch(self): self.es = setup_elasticsearch(self.config) item = self.es.get(index='jodal_documents', id=self.document_id) return [item]