def provider_map(delete=False): ''' Adds favicons and metadata for harvesters to Elasticsearch ''' from six.moves.urllib import parse as urllib_parse from scrapi import registry from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() es = dm.es if delete: es.indices.delete(index='share_providers', ignore=[404]) for harvester_name, harvester in registry.items(): with open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb") as f: favicon = urllib_parse.quote(base64.encodestring(f.read())) es.index( 'share_providers', harvester.short_name, body={ 'favicon': 'data:image/png;base64,' + favicon, 'short_name': harvester.short_name, 'long_name': harvester.long_name, 'url': harvester.url }, id=harvester.short_name, refresh=True ) print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
def provider_map(delete=False): ''' Adds favicons and metadata for harvesters to Elasticsearch ''' from six.moves.urllib import parse as urllib_parse from scrapi import registry from scrapi.base.helpers import null_on_error from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() es = dm.es if delete: es.indices.delete(index='share_providers', ignore=[404]) from scrapi.harvesters.push_api import gen_harvesters gen_harvesters() for harvester_name, harvester in registry.items(): if not null_on_error(es.get, log=False)(index='share_providers', doc_type=harvester_name, id=harvester_name): with open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb") as f: favicon = urllib_parse.quote(base64.encodestring(f.read())) es.index( 'share_providers', harvester.short_name, body={ 'favicon': 'data:image/png;base64,' + favicon, 'short_name': harvester.short_name, 'long_name': harvester.long_name, 'url': harvester.url }, id=harvester.short_name, refresh=True ) print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
def provider_map(): from scrapi.processing.elasticsearch import es for harvester_name, harvester in registry.items(): es.index( 'share_providers', harvester.short_name, body={ 'favicon': 'data:image/png;base64,' + urllib.quote(open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb").read().encode('base64')), 'short_name': harvester.short_name, 'long_name': harvester.long_name, 'url': harvester.url }, id=harvester.short_name, refresh=True ) print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
def provider_map(delete=False): from scrapi.processing.elasticsearch import es if delete: es.indices.delete(index='share_providers', ignore=[404]) for harvester_name, harvester in registry.items(): es.index( 'share_providers', harvester.short_name, body={ 'favicon': 'data:image/png;base64,' + urllib.quote(open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb").read().encode('base64')), 'short_name': harvester.short_name, 'long_name': harvester.long_name, 'url': harvester.url }, id=harvester.short_name, refresh=True ) print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])