Beispiel #1
0
def scrapers_openspendingdoc(document_id):
    config = load_config()
    es = setup_elasticsearch(config)
    kwargs = {
        'config': config,
        'document_id': document_id
    }
    OpenSpendingDocumentScraperRunner().run(**kwargs)
Beispiel #2
0
def scrapers_openspending(date_from, date_to):
    config = load_config()
    es = setup_elasticsearch(config)
    try:
        df = date_from.isoformat()
    except AttributeError as e:
        df = str(date_from)
    try:
        dt = date_to.isoformat()
    except AttributeError as e:
        dt = str(date_to)
    kwargs = {
        'config': config,
        'date_from': df,
        'date_to': dt
    }
    OpenSpendingScraperRunner().run(**kwargs)
Beispiel #3
0
def scrapers_poliflw(date_from, date_to, scroll):
    config = load_config()
    es = setup_elasticsearch(config)
    try:
        df = date_from.isoformat()
    except AttributeError as e:
        df = str(date_from)
    try:
        dt = date_to.isoformat()
    except AttributeError as e:
        dt = str(date_to)
    kwargs = {
        'config': config,
        'date_from': df,
        'date_to': dt,
        'scroll': scroll
    }
    PoliflwScraperRunner().run(**kwargs)
Beispiel #4
0
 def run(self):
     items = []
     for scraper in self.scrapers:
         k = scraper()
         try:
             k.items = []
             k.run()
             items += k.items
         except Exception as e:
             logging.error(e)
             raise e
     logging.info('Fetching resulted in %s items ...' % (len(items)))
     locations = self.aggregate(items)
     es = setup_elasticsearch()
     for l in locations:
         l['_id'] = l['id']
         l['_index'] = 'jodal_locations'
     result = bulk(es, locations, False)
Beispiel #5
0
def scrapers_obv_counts(date_from, date_to, threshold, organizations):
    config = load_config()
    es = setup_elasticsearch(config)
    try:
        df = date_from.isoformat()
    except AttributeError as e:
        df = str(date_from)
    try:
        dt = date_to.isoformat()
    except AttributeError as e:
        dt = str(date_to)
    kwargs = {
        'config': config,
        'date_from': df,
        'date_to': dt,
        'threshold': threshold,
        'organizations': organizations
    }
    OpenbesluitvormingCountsScraperRunner().run(**kwargs)
Beispiel #6
0
def es_put_template(template_dir):
    """
    Put a template into Elasticsearch. A template contains settings and mappings
    that should be applied to multiple indices. Check ``mappings/template.json``
    for an example.
    :param template_file: Path to JSON file containing the template. Defaults to ``mappings/template.json``.
    """

    config = load_config()
    es = setup_elasticsearch(config)

    click.echo('Putting ES template from dir: %s' % template_dir)

    for template_path in glob.glob(os.path.join(template_dir, 'es-*.json')):
        click.echo(template_path)
        template = {}
        with open(template_path, 'rb') as template_file:
            template = json.load(template_file)
        template_name = os.path.basename(template_file.name.replace('es-','').replace('.json', ''))
        es.indices.put_template(template_name, template)
        index_name = 'jodal_%s' % (template_name)
        if not es.indices.exists(index=index_name):
            click.echo("Should make index %s" % (index_name,))
            es.indices.create(index=index_name)
Beispiel #7
0
def scrapers_locations():
    config = load_config()
    es = setup_elasticsearch(config)

    LocationsScraperRunner().run()
Beispiel #8
0
 def _init_es(self):
     if self.es is None:
         logging.info('Elasticsearch: setting up')
         self.es = setup_elasticsearch()
Beispiel #9
0
 def fetch(self):
     self.es = setup_elasticsearch(self.config)
     item = self.es.get(index='jodal_documents', id=self.document_id)
     return [item]