Esempio n. 1
0
    def __init__(self, config_file='config.cfg'):
        super(Elastic, self).__init__()

        self.percentage=10.0
        self.minimum_occurrences=250

# The ConfigParser documentation points out that there's no way to force defaults config option
# outside the "DEFAULT" section.
        config = ConfigParser()
        config.read(config_file)
        if not config.has_section('elastic'):
            config.add_section('elastic')
        
        for option, value in {'use_ssl': 'True', 'host': '127.0.0.1', 'version': '2', 'index': 'nxapi', 'doc_type': 'events'}.items():
            if not config.has_option('elastic', option):
                config.set('elastic', option, value)

        self.version = config.getint('elastic', 'version')
        self.index = config.get('elastic', 'index')
        use_ssl = config.getboolean('elastic', 'use_ssl')
        host = config.get('elastic', 'host')
        self.doc_type = config.get('elastic', 'doc_type')
        self.client = connections.create_connection(hosts=[host], use_ssl=use_ssl, index=self.index, version=self.version, doc_type=self.doc_type, timeout=30, retry_on_timeout=True )

        Event.init(index=self.index)
        index = Index(self.index, using=self.client)
        index.doc_type(Event)
        self.initialize_search()
Esempio n. 2
0
def test_registered_doc_type_included_in_search():
    i = Index('i', using='alias')
    i.document(Post)

    s = i.search()

    assert s._doc_type == [Post]
def test_registered_doc_type_included_in_search():
    i = Index('i', using='alias')
    i.doc_type(Post)

    s = i.search()

    assert s._doc_type_map == {'post': Post}
Esempio n. 4
0
def es_delete_cmd(index_name):
    """Delete a specified index

    :arg index_name: name of index to delete

    """
    indexes = [name for name, count in get_indexes()]

    if index_name not in indexes:
        log.error('Index "%s" is not a valid index.', index_name)
        if not indexes:
            log.error('There are no valid indexes.')
        else:
            log.error('Valid indexes: %s', ', '.join(indexes))
        return

    ret = raw_input(
        'Are you sure you want to delete "%s"? (yes/no) ' % index_name
    )
    if ret != 'yes':
        return

    log.info('Deleting index "%s"...', index_name)
    index = Index(name=index_name, using='default')
    try:
        index.delete()
    except NotFoundError:
        pass
    log.info('Done!')
Esempio n. 5
0
    def create_index_if_does_not_exist(cls):
        index = Index(cls.INDEX_NAME)
        index.doc_type(cls)

        if not index.connection.indices.exists(cls.INDEX_NAME):
            index.create()
            time.sleep(1)  # It takes some time to create the index
def test_analyzers_returned_from_to_dict():
    random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard")
    index = Index('i', using='alias')
    index.analyzer(random_analyzer)

    assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
Esempio n. 7
0
def test_index_template_can_have_order():
    i = Index('i-*')
    it = i.as_template('i', order=2)

    assert {
        "index_patterns": ["i-*"],
        "order": 2
    } == it.to_dict()
def test_aliases_returned_from_to_dict():
    random_alias = ''.join((choice(string.ascii_letters) for _ in range(100)))
    alias_dict = {random_alias: {}}

    index = Index('i', using='alias')
    index.aliases(**alias_dict)

    assert index._aliases == index.to_dict()['aliases'] == alias_dict
Esempio n. 9
0
def get_index(name, doc_types, *, using, shards=1, replicas=0, interval="1s"):
    index = Index(name, using=using)
    for doc_type in doc_types:
        index.doc_type(doc_type)
    index.settings(
        number_of_shards=shards, number_of_replicas=replicas, refresh_interval=interval
    )
    return index
Esempio n. 10
0
def test_aliases_add_to_object():
    random_alias = ''.join((choice(string.ascii_letters) for _ in range(100)))
    alias_dict = {random_alias: {}}

    index = Index('i', using='alias')
    index.aliases(**alias_dict)

    assert index._aliases == alias_dict
Esempio n. 11
0
def test_index_can_be_saved_even_with_settings(write_client):
    i = Index('test-blog', using=write_client)
    i.settings(number_of_shards=3, number_of_replicas=0)
    i.save()
    i.settings(number_of_replicas=1)
    i.save()

    assert '1' == i.get_settings()['test-blog']['settings']['index']['number_of_replicas']
    def initialize_index(self, delete_if_exists=False):
        """
        Initialize index with mapping in ElasticSearch

        :param delete_if_exists: delete index, if exists
        :return: None
        """

        def update_index_settings():
            """
            Function updates settings for slovenian lemmatization of words.
            As far as we know, elasticsearch-dsl library does not support
            custom filter settings.

            :return: None
            """
            analysis_settings = {
                "analysis": {
                    "filter": {
                        "lemmagen_filter_sl": {
                            "type": "lemmagen",
                            "lexicon": "sl"
                        }
                    },
                    "analyzer": {
                        "lemmagen_sl": {
                            "type": "custom",
                            "tokenizer": "uax_url_email",
                            "filter": [
                                "lemmagen_filter_sl",
                                "lowercase"
                            ]
                        }
                    }
                }
            }
            self.client.cluster.health(index=self.index_name,
                                       wait_for_status='green',
                                       request_timeout=2)
            self.client.indices.close(index=self.index_name)
            self.client.indices.put_settings(json.dumps(analysis_settings),
                                             index=self.index_name)
            self.client.indices.open(index=self.index_name)

        index = Index(self.index_name, using=self.client)
        if delete_if_exists and index.exists():
            index.delete()

        index.settings(
            # use higher number in production
            number_of_replicas=0
        )

        # register models
        index.doc_type(Document)
        index.create()
        update_index_settings()  # set lemmanizer
 def run(self, *args, **options):
     self.confirm(
         u"Are you really sure you want to delete the index '{0}' ?"
         .format(self.index_name)
     )
     index = Index(self.index_name)
     if not self.dry_run:
         index.delete()
     self.print_success(u"Index {0} deleted.".format(self.index_name))
Esempio n. 14
0
def drop_index(silent=True):
    """Remove the ElasticSearch index.
    """
    index = Index(elasticsearch_config['index'])
    try:
        index.delete()
    except Exception as exc:
        if not silent:
            raise exc
Esempio n. 15
0
    def recreate_index(self):
        """ Delete and then create a given index and set a default mapping.

        :param index: [string] name of the index. If None a default is used
        """
        submission = Index(self.index)
        submission.delete(ignore=404)

        ESSubmission.init()
Esempio n. 16
0
def test_delete(write_client):
    write_client.indices.create(
        index='test-index',
        body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}}
    )

    i = Index('test-index', using=write_client)
    i.delete()
    assert not write_client.indices.exists(index='test-index')
Esempio n. 17
0
class BaseSearchTestCase(TestCase):

    def setUp(self):
        from django.conf import settings
        SEARCH = getattr(settings, 'SEARCH')

        connections.create_connection('testing', **SEARCH['default']['connections'])
        self.index = Index(SEARCH['default']['index'], using='testing')
        # This is needed for test_documents, but has side effects in all running tests
        doctypes_list = (
            value for name, value
            in inspect.getmembers(documents)
            if not name.startswith('_') and
            inspect.isclass(value) and
            issubclass(value, DocType) and
            name != DocType.__name__
        )

        for doctype in doctypes_list:
            # Remove assigned index
            doctype._doc_type.index = None
            # Associate docs with test index
            self.index.doc_type(doctype)

        if self.index.exists():
            self.index.delete(ignore=404)
        self.index.create()

        self.search = Search(index=SEARCH['default']['index'])

    def tearDown(self):
        self.index.delete()
        queue = django_rq.get_queue()
        queue.empty()
Esempio n. 18
0
def test_cloned_index_has_copied_settings_and_using():
    client = object()
    i = Index('my-index', using=client)
    i.settings(number_of_shards=1)

    i2 = i.clone('my-other-index')

    assert 'my-other-index' == i2._name
    assert client is i2._using
    assert i._settings == i2._settings
    assert i._settings is not i2._settings
Esempio n. 19
0
    def test_create_index_manually(self):
        out = io.StringIO()
        index_name = 'test_manually_created_index'
        call_command('create_index', index_name, stdout=out)
        self.assertIn("Created search index '{}'".format(index_name), out.getvalue())

        index = Index(index_name)
        self.assertTrue(index.exists())

        index.delete()
        self.assertFalse(index.exists())
Esempio n. 20
0
def test_registered_doc_type_included_in_to_dict():
    i = Index('i', using='alias')
    i.document(Post)

    assert {
        'mappings': {
            'properties': {
                'title': {'type': 'text'},
                'published_from': {'type': 'date'},
            }
        }
    } == i.to_dict()
Esempio n. 21
0
    def test_create_index_usings_settings(self):
        out = io.StringIO()
        call_command('create_index', stdout=out)

        self.assertIn("Creating search indices from settings", out.getvalue())
        self.assertIn("Created search index '{}'".format(self.settings['default']['index']), out.getvalue())

        index = Index(self.settings['default']['index'])
        self.assertTrue(index.exists())

        index.delete()
        self.assertFalse(index.exists())
Esempio n. 22
0
def test_conflicting_mapping_raises_error_in_index_to_dict():
    class A(document.Document):
        name = field.Text()

    class B(document.Document):
        name = field.Keyword()

    i = Index('i')
    i.document(A)
    i.document(B)

    with raises(ValueError):
        i.to_dict()
Esempio n. 23
0
 def applyConfig(self):
     try:
         print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex))
         res = connections.create_connection(hosts=[self.confESHost])
         idx = Index(self.confESIndex)
         idx.doc_type(DocHTTPRequestResponse)
         DocHTTPRequestResponse.init()
         try:
             idx.create()
         except:
             pass
     except Exception as e:
         JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)
Esempio n. 24
0
def create_index(hosts, index):
    i = set_hosts_index(hosts=hosts, index=index)
    logprint('debug', 'creating new index')
    i = Index(index)
    i.create()
    logprint('debug', 'registering doc types')
    i.doc_type(Author)
    i.doc_type(Page)
    i.doc_type(Source)
    logprint('debug', 'DONE')
    def test_es_create_documents(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents')

        # index_name not in settings.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_documents',
                index_name='barfoo'
            )

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_documents',
                index_name='foobar'
            )

        index = Index('foobar')
        doc_type = Token.get_es_doc_type()
        index.doc_type(doc_type)
        index.create()
        self.refresh()

        # Disable auto indexing while creating objects.
        settings.TRAMPOLINE['OPTIONS']['disabled'] = True
        token = Token.objects.create(name="token")
        token_not_indexable = Token.objects.create(name='not_indexable')
        token_raise_exception = Token.objects.create(name='raise_exception')
        settings.TRAMPOLINE['OPTIONS']['disabled'] = False

        # Dry run.
        call_command(
            'es_create_documents',
            index_name='foobar',
            dry_run=True
        )
        self.assertDocDoesntExist(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)

        call_command(
            'es_create_documents',
            index_name='foobar',
            verbosity=3
        )
        self.assertDocExists(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)
    def test_es_create_alias(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                target_name='foobar_target'
            )

        # Target name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                index_name='foobar'
            )

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                index_name='foobar',
                target_name='foobar_target'
            )

        index = Index('foobar_target')
        index.create()
        self.refresh()

        # Alias with same name as index.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                index_name='foobar_target',
                target_name='foobar_target'
            )

        # Dry run.
        call_command(
            'es_create_alias',
            index_name='foobar',
            target_name='foobar_target',
            dry_run=True
        )
        self.assertAliasDoesntExist(index='foobar_target', name='foobar')

        call_command(
            'es_create_alias',
            index_name='foobar',
            target_name='foobar_target'
        )
        self.assertAliasExists(index='foobar_target', name='foobar')
Esempio n. 27
0
def test_registered_doc_type_included_in_to_dict():
    i = Index('i', using='alias')
    i.doc_type(Post)

    assert Post._doc_type.index == 'i'
    assert {
        'mappings': {
            'post': {
                'properties': {
                    'title': {'type': 'string'},
                    'published_from': {'type': 'date'},
                }
            }
        }
    } == i.to_dict()
Esempio n. 28
0
    def registerExtenderCallbacks(self, callbacks):
        self.callbacks = callbacks
        self.helpers = callbacks.getHelpers()
        callbacks.setExtensionName("Storing HTTP Requests/Responses into ElasticSearch")
        self.callbacks.registerHttpListener(self)
        self.callbacks.registerContextMenuFactory(self)
        self.out = callbacks.getStdout()

        res = connections.create_connection(hosts=[ES_host])
        idx = Index(ES_index)
        idx.doc_type(DocHTTPRequestResponse)
        try:
            idx.create()
        except:
            print("Index already exists")
Esempio n. 29
0
def test_doc_type_can_be_set():
    i = Index('i', doc_type='t')
    m = Mapping('t')
    m.field('title', Text())
    i.mapping(m)

    assert {
        'mappings': {
            't': {
                'properties': {
                    'title': {'type': 'text'}
                }
            }
        }
    } == i.to_dict()
    def test_es_delete_alias(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                target_name='foobar_target'
            )

        # Target name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                index_name='foobar'
            )

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                index_name='foobar',
                target_name='foobar_target',
                yes=True
            )

        index = Index('foobar_target')
        index.create()
        self.refresh()

        # Alias doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                index_name='foobar',
                target_name='foobar_target',
                yes=True
            )

        trampoline_config.connection.indices.put_alias(
            index='foobar_target', name='foobar')
        self.assertAliasExists(index='foobar_target', name='foobar')

        call_command(
            'es_delete_alias',
            index_name='foobar',
            target_name='foobar_target',
            yes=True
        )
        self.assertAliasDoesntExist(index='foobar_target', name='foobar')
Esempio n. 31
0
 def run(self, start_date=None, end_date=None, update_bookmark=True):
     """Calculate statistics aggregations."""
     # If no events have been indexed there is nothing to aggregate
     if not Index(self.event_index, using=self.client).exists():
         return
     lower_limit = start_date or self.get_bookmark()
     # Stop here if no bookmark could be estimated.
     if lower_limit is None:
         return
     upper_limit = min(
         end_date or datetime.datetime.max,  # ignore if `None`
         datetime.datetime.utcnow().replace(microsecond=0),
         datetime.datetime.combine(
             lower_limit + datetime.timedelta(self.batch_size),
             datetime.datetime.min.time())
     )
     while upper_limit <= datetime.datetime.utcnow():
         self.indices = set()
         self.new_bookmark = upper_limit.strftime(self.doc_id_suffix)
         bulk(self.client,
              self.agg_iter(lower_limit, upper_limit),
              stats_only=True,
              chunk_size=50)
         # Flush all indices which have been modified
         current_search_client.indices.flush(
             index=','.join(self.indices),
             wait_if_ongoing=True
         )
         if update_bookmark:
             self.set_bookmark()
         self.indices = set()
         lower_limit = lower_limit + datetime.timedelta(self.batch_size)
         upper_limit = min(
             end_date or datetime.datetime.max,  # ignore if `None``
             datetime.datetime.utcnow().replace(microsecond=0),
             lower_limit + datetime.timedelta(self.batch_size)
         )
         if lower_limit > upper_limit:
             break
Esempio n. 32
0
def create_indexes(names, settings=None):
    """
    Create Elasticsearch indexes

    Args:
        names (list): A list of index names
        settings (dict): Index settings

    """
    for name in names:
        index = Index(name)
        try:
            if not index.exists():
                logger.debug("Creating Elasticsearch index: {0}".format(name))
                if settings:
                    index.put_settings(settings)
                index.create()
        except Exception as e:
            raise ElasticsearchError(
                "Elasticsearch error: {0}".format(e.__str__()))
Esempio n. 33
0
class ElasticSearch:
    __logger = logging
    __client = Elasticsearch()
    __index_name = "merchant_services"
    __doc_type = MerchantServicesDocument
    __index = Index(__index_name)

    def search(self, query):
        self.__logger.debug("in: {}".format(query))
        s = Search(using=self.__client,
                   index=self.__index_name,
                   doc_type=self.__doc_type)
        # q = Q('match', stem_terms=query)
        q = Q('match', terms=query)
        s = s.query(q)
        responses = s.execute()

        documents = []
        if responses.success():
            for hit in responses.hits:
                documents.append(
                    MerchantServices.objects.get(native_id=hit.native_id))

        self.__logger.debug("out: {}".format([
            'id: {}, name: {}'.format(doc.get_native_id(), doc.get_name())
            for doc in documents
        ]))
        return documents

    def bulk_indexing(self):
        self.__index.delete(ignore=404)
        self.__index.create()
        self.__client.indices.close(index=self.__index_name)
        self.__doc_type.init()
        self.__client.indices.open(index=self.__index_name)

        bulk(client=self.__client,
             actions=(b.indexing()
                      for b in MerchantServices.objects.all().iterator()))
Esempio n. 34
0
    def handle(self, *args, **options):
        from elasticsearch import Elasticsearch
        from elasticsearch_dsl import Index, Mapping
        ELASTICSEARCH_HOSTS = ['localhost']
        ELASTICSEARCH_INDEX = 'kindle2'

        es = Elasticsearch(ELASTICSEARCH_HOSTS)
        newindex = Index(ELASTICSEARCH_INDEX, using=es)
        if newindex.exists():
            exit('index already exists,change a new name')
        mp = Mapping()
        mp.field('title', 'text')
        mp.field('creator', 'text')
        mp.field('publisher', 'text')
        mp.field('iclass', 'text')
        mp.field('isbn', 'text')
        mp.field('asin', 'keyword')

        newindex.mapping(mp)
        newindex.create()
Esempio n. 35
0
    def construct_query(self, system, file_path=None):

        files_index_name = Index(
            settings.ES_INDEX_PREFIX.format('files')).get_alias().keys()[0]

        if system == settings.AGAVE_STORAGE_SYSTEM:
            storage_prefix_query = Q(
                {'prefix': {
                    'path._exact': '/' + self.username
                }})
        else:
            storage_prefix_query = Q({'prefix': {'path._exact': '/'}})

        ngram_query = Q("query_string",
                        query=self.query_string,
                        fields=["name"],
                        minimum_should_match='80%',
                        default_operator='or')

        match_query = Q("query_string",
                        query=self.query_string,
                        fields=["name._exact", "name._pattern"],
                        default_operator='and')

        private_files_query = Q(
            'bool',
            must=[
                Q({'term': {
                    '_index': files_index_name
                }}),
                Q({'term': {
                    'system._exact': system
                }}), storage_prefix_query, (ngram_query | match_query)
            ],
            must_not=[Q({"prefix": {
                "path._exact": "/.Trash"
            }})])

        return private_files_query
Esempio n. 36
0
def customer_index(app):
    """Initialize the `Customer` doc type."""
    test_index = Index(uuid4().hex)
    test_index.create()
    app.cluster.health(wait_for_status='yellow')

    # monkey patch `auth_index`
    original_auth_index = auth_models.auth_index
    auth_models.auth_index = test_index

    Customer.init(index=test_index._name)
    Customer._doc_type.index = test_index._name

    yield test_index

    auth_models.auth_index = original_auth_index
    test_index.delete()
Esempio n. 37
0
 def applyConfig(self):
     try:
         print("Connecting to '%s', index '%s'" %
               (self.confESHost, self.confESIndex))
         res = connections.create_connection(hosts=[self.confESHost])
         idx = Index(self.confESIndex)
         idx.doc_type(DocHTTPRequestResponse)
         DocHTTPRequestResponse.init()
         try:
             idx.create()
         except:
             pass
     except Exception as e:
         JOptionPane.showMessageDialog(
             self.panel,
             "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>"
             % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)
Esempio n. 38
0
 def handle(self, *args, **options):
     index = "kplc_interruptions"
     es = Elasticsearch([{
         'host': settings.ES_SETTINGS['HOST'],
         'port': settings.ES_SETTINGS['PORT']
     }],
                        index=index)
     kplc_index = Index(index, using=settings.ES_SETTINGS['ALIAS'])
     kplc_index.document(InterruptionPdfTextDoc)
     if kplc_index.exists():
         kplc_index.delete()
         # TODO: Use logger
         print('Deleted kplc interruptions index.')
     InterruptionPdfTextDoc.init()
     result = bulk(
         client=es,
         actions=(pdf.index()
                  for pdf in InterruptionPdfText.objects.all().iterator()))
     # TODO: Use logger
     print('Indexed kplc interruptions.')
     print(result)
Esempio n. 39
0
    def clean(self):
        cleaned_data = super().clean()
        param = cleaned_data.get("param")
        query = self.data['query']
        search_type = cleaned_data.get("search_type")

        if search_type == 'simple':
            try:
                elastic_field = SimpleSearchField.objects.get(
                    pk=param).elastic_index_field
                query = prepare_simple_query(query, elastic_field.field_type)
            except SimpleSearchField.DoesNotExist:
                raise forms.ValidationError("Невірний параметр запиту")
        else:
            inid_code = InidCodeSchedule.objects.filter(
                ipc_code=param,
                enable_search=1,
                elastic_index_field__isnull=False).first()
            if inid_code:
                elastic_field = inid_code.elastic_index_field
                query = prepare_advanced_query(query, elastic_field.field_type)
            else:
                raise forms.ValidationError("Невірний параметр запиту")

        # Валидация запроса в ElasticSearch
        client = Elasticsearch(settings.ELASTIC_HOST,
                               timeout=settings.ELASTIC_TIMEOUT)
        i = Index(settings.ELASTIC_INDEX_NAME, using=client).validate_query(
            body={
                'query':
                Q('query_string',
                  query=query,
                  default_field=elastic_field.field_name,
                  default_operator='AND').to_dict()
            })

        if not i['valid']:
            raise forms.ValidationError("Невірний запит")
Esempio n. 40
0
    def construct_query(self, system, file_path=None):

        files_index_name = Index('des-files').get_alias().keys()[0]

        if system == settings.AGAVE_STORAGE_SYSTEM:
            storage_prefix_query = Q({'prefix': {'path._exact': '/' + self.username}})
        else:
            storage_prefix_query = Q({'prefix': {'path._exact': '/'}})

        private_files_query = Q(
            'bool',
            must=[
                Q({'term': {'_index': files_index_name}}),
                Q({'term': {'system._exact': system}}),
                storage_prefix_query,
                Q("query_string", query=self.query_string, default_operator="and")
            ],
            must_not=[
                Q({"prefix": {"path._exact": "/.Trash"}})
            ]
        )

        return private_files_query
Esempio n. 41
0
def create_index():
    try:
        create_connection()
        db = Index(INDEX_NAME)
        db.settings(**INDEX_SETTINGS)
        db.create()
    except Exception as e:
        ActivityLog.objects.create_log(
            None,
            level='C',
            view_name='elastic_search.es_core_config.create_index',
            message=
            'Error in creating index in ElasticSearch with error message - %s'
            % e.message,
            traceback=traceback.format_exc())
        raise Exception(e)
    else:
        return db
Esempio n. 42
0
    def run(self, start_date=None, end_date=None, update_bookmark=True):
        """Calculate statistics aggregations."""
        # If no events have been indexed there is nothing to aggregate
        if not Index(self.event_index, using=self.client).exists():
            return

        lower_limit = self.bookmark_api.get_lower_limit(start_date)

        # Stop here if no bookmark could be estimated.
        if lower_limit is None:
            return

        upper_limit = self.bookmark_api.get_upper_limit(
            start_date, end_date, self.batch_size)

        while upper_limit <= datetime.datetime.utcnow() and self.has_events:
            self.indices = set()

            bulk(self.client,
                 self.agg_iter(lower_limit, upper_limit),
                 stats_only=True,
                 chunk_size=50)
            # Flush all indices which have been modified
            current_search.flush_and_refresh(index='*')
            if update_bookmark and self.has_events:
                self.bookmark_api.set_bookmark(
                    upper_limit.strftime(self.doc_id_suffix)
                    or datetime.datetime.utcnow().strftime(self.doc_id_suffix))

            lower_limit = lower_limit + datetime.timedelta(self.batch_size)
            upper_limit = min(
                end_date or datetime.datetime.max,  # ignore if `None``
                datetime.datetime.utcnow().replace(microsecond=0),
                lower_limit + datetime.timedelta(self.batch_size))
            if lower_limit > upper_limit:
                break
Esempio n. 43
0
def create_index(client, *, index_name, **kwargs):
    p = IngestClient(client)
    p.put_pipeline(id='document_attachment',
                   body={
                       'description': "Extract attachment information",
                       'processors': [{
                           "attachment": {
                               "field": "source_file"
                           }
                       }]
                   })

    index = Index(index_name, using=client)
    index.doc_type(Document)
    try:
        index.create()
    except RequestError:
        print(f"Index named '{index_name}' already exists", file=sys.stderr)
        sys.exit(1)
Esempio n. 44
0
def create_indexes(names=None, settings=None):
    """
    Create Elasticsearch indexes

    Args:
        names (list): A list of index names
        ["dmarc_aggregate", "dmarc_forensic"] by default
        settings (dict): Index settings

    """
    if names is None:
        names = ["dmarc_aggregate", "dmarc_forensic"]
    for name in names:
        index = Index(name)
        try:
            if not index.exists():
                logger.debug("Creating Elasticsearch index: {0}".format(name))
                if settings:
                    index.put_settings(settings)
                index.create()
        except Exception as e:
            raise ElasticsearchError(
                "Elasticsearch error: {0}".format(e.__str__()))
Esempio n. 45
0
def test_cloned_index_has_analysis_attribute():
    """
    Regression test for Issue #582 in which `Index.clone()` was not copying
    over the `_analysis` attribute.
    """
    client = object()
    i = Index('my-index', using=client)

    random_analyzer_name = ''.join(
        (choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name,
                               tokenizer="standard",
                               filter="standard")

    i.analyzer(random_analyzer)

    i2 = i.clone('my-clone-index')

    assert i.to_dict()['settings']['analysis'] == i2.to_dict(
    )['settings']['analysis']
Esempio n. 46
0
    def mitm_request(self, data):
        # Initialize ES connection and index
        res = connections.create_connection(hosts=[args.elasticsearch])
        idx = Index(args.index)
        idx.doc_type(DocHTTPRequestResponse)
        try:
            DocHTTPRequestResponse.init()
            idx.create()
        except:
            pass

        r = HTTPRequest(data)

        # determine url
        if self.is_connect:
            scheme = "https"
        else:
            scheme = "http"
        url = scheme + "://" + self.hostname
        if scheme == "http" and int(
                self.port) != 80 or scheme == "https" and int(
                    self.port) != 443:
            url += ":" + self.port
        url += self.path

        if args.verbose:
            print(url)

        self.doc = DocHTTPRequestResponse(host=self.hostname,
                                          port=int(self.port),
                                          protocol=scheme)
        self.doc.meta.index = args.index
        self.doc.request.url = url
        self.doc.request.requestline = r.requestline
        self.doc.request.method = r.command
        self.doc.host = self.hostname
        self.doc.port = int(self.port)
        self.doc.protocol = scheme

        return data
Esempio n. 47
0
    def register(self, name=None, version=None, settings=None):
        """
        Register an index locally.

        Note that `createall` is needed to save the index to Elasticsearch.

        The index will be named per convention such that:
         -  The graph's name is used by default
         -  The "test" suffix is added for unit testing (to avoid clobbering real data)

        If version is provided, it will be used to create generate an alias (to the unversioned name).

        """
        if version is None:
            index_name = IndexRegistry.name_for(self.graph, name=name)
            alias_name = None
        else:
            # create index with full version, alias to shortened version
            index_name = IndexRegistry.name_for(self.graph,
                                                name=name,
                                                version=version)
            alias_name = IndexRegistry.name_for(self.graph, name=name)

        if index_name in self.indexes:
            raise Exception(
                "Index already registered for name: {}".format(index_name))

        index = Index(
            name=index_name,
            using=self.graph.elasticsearch_client,
        )

        if settings:
            index.settings(**settings)

        if alias_name is not None:
            index.aliases(**{alias_name: {}})

        self.indexes[index_name] = index
        return index
Esempio n. 48
0
    def test_es_create_documents(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents')

        # index_name not in settings.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents', index_name='barfoo')

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents', index_name='foobar')

        index = Index('foobar')
        doc_type = Token.get_es_doc_type()
        index.doc_type(doc_type)
        index.create()
        self.refresh()

        # Disable auto indexing while creating objects.
        settings.TRAMPOLINE['OPTIONS']['disabled'] = True
        token = Token.objects.create(name="token")
        token_not_indexable = Token.objects.create(name='not_indexable')
        token_raise_exception = Token.objects.create(name='raise_exception')
        settings.TRAMPOLINE['OPTIONS']['disabled'] = False

        # Dry run.
        call_command('es_create_documents', index_name='foobar', dry_run=True)
        self.assertDocDoesntExist(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)

        call_command('es_create_documents', index_name='foobar', verbosity=3)
        self.assertDocExists(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)
Esempio n. 49
0
def test_search_is_limited_to_index_name():
    i = Index('my-index')
    s = i.search()

    assert s._index == ['my-index']
Esempio n. 50
0
def exists():
    return Index(APIDoc.Index.name).exists()
Esempio n. 51
0
def refresh():

    index = Index(APIDoc.Index.name)
    index.refresh()
Esempio n. 52
0
def delete():
    Index(APIDoc.Index.name).delete()
        Should return list of family member names
        """
        family = getattr(self.general, "family", None)
        if family:
            for member in family:
                if hasattr(member, "family_name"):
                    yield member.family_name
        else:
            for member in parse_raw_family_string(
                getattr(self.general, "family_raw", "")
            ):
                if "family_name" in member:
                    yield member["family_name"]


declarations_idx = Index(OLD_DECLARATION_INDEX)
declarations_idx.settings(
    number_of_shards=NUMBER_OF_SHARDS, number_of_replicas=NUMBER_OF_REPLICAS
)
declarations_idx.analyzer(namesAutocompleteAnalyzer)
declarations_idx.analyzer(namesAutocompleteSearchAnalyzer)


@declarations_idx.doc_type
class Declaration(DocType, AbstractDeclaration):
    """Declaration document.
    Assumes there's a dynamic mapping with all fields not indexed by default."""

    persons = Text(analyzer="ukrainian", copy_to="all")
    countries = Text(analyzer="ukrainian", copy_to="all")
    companies = Text(analyzer="ukrainian", copy_to="all")
    def handle(self, *args, **options):
        text_analyzer = get_text_analyzer("german")
        elastic_index = Index("mst_debug")
        if not elastic_index.exists():
            elastic_index.create()
        elastic_index.close()
        elastic_index.analyzer(text_analyzer)
        elastic_index.save()
        elastic_index.open()
        elastic_index.flush()

        for word in options["words"]:
            analysis = elastic_index.analyze(body={
                "analyzer": "text_analyzer",
                "text": word
            })
            tokens = [i["token"] for i in analysis["tokens"]]
            self.stdout.write("{} {}\n".format(word, tokens))
Esempio n. 55
0
 def exists():
     return Index(INDEX_NAME).exists()
Esempio n. 56
0
 def delete():
     try:
         Index(INDEX_NAME).delete()
     except elasticsearch.exceptions.NotFoundError:
         log.info(
             'Could not delete non-existent index, creating new index...')
Esempio n. 57
0
def index_doc(doc, index_name="wiki-dumps"):
    global es
    index = Index(index_name)
    index.create()
Esempio n. 58
0
    def construct_query(self, system=None, file_path=None, **kwargs):
        project_query_fields = [
            "projectId", "title", "description", "doi", "publications", "pis",
            "name"
        ]
        published_index_name = list(
            Index(settings.ES_INDEX_PREFIX.format(
                'publications')).get_alias().keys())[0]
        legacy_index_name = list(
            Index(settings.ES_INDEX_PREFIX.format(
                'publications-legacy')).get_alias().keys())[0]
        filter_queries = []
        if kwargs.get('type_filters'):
            for type_filter in kwargs['type_filters']:
                if type_filter == 'nees':
                    type_query = Q({'term': {'_index': legacy_index_name}})
                else:
                    type_query = Q(
                        'term',
                        **{'project.value.projectType._exact': type_filter})
                filter_queries.append(type_query)

        ds_user_query = Q({
            "nested": {
                "path": "users",
                "ignore_unmapped": True,
                "query": {
                    "query_string": {
                        "query":
                        self.query_string,
                        "fields": [
                            "users.first_name", "users.last_name",
                            "user.username"
                        ],
                        "lenient":
                        True
                    }
                }
            }
        })
        nees_pi_query = Q({
            "nested": {
                "path": "pis",
                "ignore_unmapped": True,
                "query": {
                    "query_string": {
                        "query": self.query_string,
                        "fields": ["pis.firstName", "pis.lastName"],
                        "lenient": True
                    }
                }
            }
        })
        pub_query = Q('query_string',
                      query=self.query_string,
                      default_operator='and',
                      fields=project_query_fields)

        published_query = Q(
            'bool',
            must=[
                Q('bool', should=[ds_user_query, nees_pi_query, pub_query]),
                Q({'term': {
                    '_index': legacy_index_name
                }}),
            ],
            must_not=[
                Q('term', status='unpublished'),
                Q('term', status='saved')
            ])

        return published_query
Esempio n. 59
0
    def get(self, request):
        """GET handler."""
        q = request.GET.get('query_string')
        offset = int(request.GET.get('offset', 0))
        limit = int(request.GET.get('limit', 10))
        if limit > 500:
            return HttpResponseBadRequest("limit must not exceed 500")
        type_filter = request.GET.get('type_filter', 'all')

        doc_type_map = {
            Index(settings.ES_INDEX_PREFIX.format('publications')).get_alias().keys(
            )[0]:
            'publication',
            Index(settings.ES_INDEX_PREFIX.format('publications-legacy')).get_alias(
            ).keys()[0]:
            'publication',
            Index(settings.ES_INDEX_PREFIX.format('files')).get_alias().keys()[0]:
            'file',
            Index(settings.ES_INDEX_PREFIX.format('cms')).get_alias().keys()[0]:
            'modelresult'
        }

        public_files_query = CommunityDataSearchManager(
            request).construct_query() | PublishedDataSearchManager(
                request).construct_query()
        publications_query = PublicationsSearchManager(
            request).construct_query()
        cms_query = es_query = CMSSearchManager(request).construct_query()

        if type_filter == 'public_files':
            es_query = Search().query(public_files_query)
        elif type_filter == 'published':
            es_query = Search().query(publications_query)
        elif type_filter == 'cms':
            es_query = Search().query(cms_query).highlight(
                'body',
                fragment_size=100).highlight_options(pre_tags=["<b>"],
                                                     post_tags=["</b>"],
                                                     require_field_match=False)
        elif type_filter == 'all':
            es_query = Search().query(public_files_query | publications_query
                                      | cms_query).highlight(
                                          'body',
                                          fragment_size=100).highlight_options(
                                              pre_tags=["<b>"],
                                              post_tags=["</b>"],
                                              require_field_match=False)

        try:
            res = es_query.execute()
        except (TransportError, ConnectionTimeout) as err:
            if getattr(err, 'status_code', 500) == 404:
                raise
            res = es_query.execute()

        out = {}
        hits = []

        for r in res:
            d = r.to_dict()
            d["doc_type"] = doc_type_map[r.meta.index]
            if hasattr(r.meta, 'highlight'):
                highlight = r.meta.highlight.to_dict()
                d["highlight"] = highlight
            if r.meta.doc_type == 'publication' and hasattr(r, 'users'):
                users = r.users
                pi = r.project.value.pi
                pi_user = filter(lambda x: x.username == pi, users)[0]
                d["piLabel"] = "{}, {}".format(pi_user.last_name,
                                               pi_user.first_name)
            hits.append(d)

        out['total_hits'] = res.hits.total.value
        out['hits'] = hits
        out['all_total'] = Search().query(public_files_query
                                          | publications_query
                                          | cms_query).count()
        out['public_files_total'] = Search().query(public_files_query).count()
        out['published_total'] = Search().query(publications_query).count()
        out['cms_total'] = Search().query(cms_query).count()

        return JsonResponse(out, safe=False)
Esempio n. 60
0
# coding:utf-8
'''
@author = super_fazai
@File    : search.py
@Time    : 2017/8/11 10:41
@connect : [email protected]
'''

from elasticsearch_dsl import (
    DocType,
    Index,
)
from scrapy import Field


class Post():
    id = Field()


posts = Index('posts')


@posts.doc_type
class PostDocument(DocType):
    class Meta:
        model = Post

        fields = [
            'id',
        ]