Beispiel #1
1
def reindex(src, dest):
    from elasticsearch import helpers
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()

    helpers.reindex(dm.es, src, dest)
    dm.es.indices.delete(src)
Beispiel #2
0
def alias(alias, index):
    ''' Creates an Elasticsearch index alias '''
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()
    dm.es.indices.delete_alias(index=alias, name='_all', ignore=404)
    dm.es.indices.put_alias(alias, index)
Beispiel #3
0
def provider_map(delete=False):
    ''' Adds favicons and metadata for harvesters to Elasticsearch '''
    from six.moves.urllib import parse as urllib_parse
    from scrapi import registry
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()
    es = dm.es
    if delete:
        es.indices.delete(index='share_providers', ignore=[404])

    for harvester_name, harvester in registry.items():
        with open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb") as f:
            favicon = urllib_parse.quote(base64.encodestring(f.read()))

        es.index(
            'share_providers',
            harvester.short_name,
            body={
                'favicon': 'data:image/png;base64,' + favicon,
                'short_name': harvester.short_name,
                'long_name': harvester.long_name,
                'url': harvester.url
            },
            id=harvester.short_name,
            refresh=True
        )
    print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
Beispiel #4
0
def gen_harvester(shortname=None, longname=None, url=None, favicon_dataurl=None, **kwargs):
    assert shortname and longname and url and favicon_dataurl
    logger.info('Generating harvester {}'.format(shortname))

    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()
    es = dm.es

    es.index(
        'share_providers',
        shortname,
        body={
            'favicon': favicon_dataurl,
            'short_name': shortname,
            'long_name': longname,
            'url': url
        },
        id=shortname,
        refresh=True
    )
    return type(
        '{}Harvester'.format(shortname.lower().capitalize()),
        (PushApiHarvester, ),
        dict(short_name=shortname, long_name=longname, url=url)
    )
Beispiel #5
0
def alias(alias, index):
    ''' Creates an Elasticsearch index alias '''
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()
    dm.es.indices.delete_alias(index=alias, name='_all', ignore=404)
    dm.es.indices.put_alias(alias, index)
Beispiel #6
0
def provider_map(delete=False):
    ''' Adds favicons and metadata for harvesters to Elasticsearch '''
    from six.moves.urllib import parse as urllib_parse
    from scrapi import registry
    from scrapi.base.helpers import null_on_error
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()
    es = dm.es
    if delete:
        es.indices.delete(index='share_providers', ignore=[404])
    from scrapi.harvesters.push_api import gen_harvesters
    gen_harvesters()

    for harvester_name, harvester in registry.items():
        if not null_on_error(es.get, log=False)(index='share_providers', doc_type=harvester_name, id=harvester_name):
            with open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb") as f:
                favicon = urllib_parse.quote(base64.encodestring(f.read()))

            es.index(
                'share_providers',
                harvester.short_name,
                body={
                    'favicon': 'data:image/png;base64,' + favicon,
                    'short_name': harvester.short_name,
                    'long_name': harvester.long_name,
                    'url': harvester.url
                },
                id=harvester.short_name,
                refresh=True
            )
    print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
Beispiel #7
0
def gen_harvester(shortname=None,
                  longname=None,
                  url=None,
                  favicon_dataurl=None,
                  **kwargs):
    assert shortname and longname and url and favicon_dataurl
    logger.info('Generating harvester {}'.format(shortname))

    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()
    es = dm.es

    es.index('share_providers',
             shortname,
             body={
                 'favicon': favicon_dataurl,
                 'short_name': shortname,
                 'long_name': longname,
                 'url': url
             },
             id=shortname,
             refresh=True)
    return type('{}Harvester'.format(shortname.lower().capitalize()),
                (PushApiHarvester, ),
                dict(short_name=shortname, long_name=longname, url=url))
Beispiel #8
0
def reindex(src, dest):
    ''' Reindexes documents from index {src} to index {dest}'''
    from elasticsearch import helpers
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()

    helpers.reindex(dm.es, src, dest)
    dm.es.indices.delete(src)
Beispiel #9
0
def reindex(src, dest):
    ''' Reindexes documents from index {src} to index {dest}'''
    from elasticsearch import helpers
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()

    helpers.reindex(dm.es, src, dest)
    dm.es.indices.delete(src)