コード例 #1
0
ファイル: elastic.py プロジェクト: nbs-system/nxtool
    def __init__(self, config_file='config.cfg'):
        super(Elastic, self).__init__()

        self.percentage=10.0
        self.minimum_occurrences=250

# The ConfigParser documentation points out that there's no way to force defaults config option
# outside the "DEFAULT" section.
        config = ConfigParser()
        config.read(config_file)
        if not config.has_section('elastic'):
            config.add_section('elastic')
        
        for option, value in {'use_ssl': 'True', 'host': '127.0.0.1', 'version': '2', 'index': 'nxapi', 'doc_type': 'events'}.items():
            if not config.has_option('elastic', option):
                config.set('elastic', option, value)

        self.version = config.getint('elastic', 'version')
        self.index = config.get('elastic', 'index')
        use_ssl = config.getboolean('elastic', 'use_ssl')
        host = config.get('elastic', 'host')
        self.doc_type = config.get('elastic', 'doc_type')
        self.client = connections.create_connection(hosts=[host], use_ssl=use_ssl, index=self.index, version=self.version, doc_type=self.doc_type, timeout=30, retry_on_timeout=True )

        Event.init(index=self.index)
        index = Index(self.index, using=self.client)
        index.doc_type(Event)
        self.initialize_search()
コード例 #2
0
def test_registered_doc_type_included_in_search():
    i = Index('i', using='alias')
    i.document(Post)

    s = i.search()

    assert s._doc_type == [Post]
コード例 #3
0
def test_registered_doc_type_included_in_search():
    i = Index('i', using='alias')
    i.doc_type(Post)

    s = i.search()

    assert s._doc_type_map == {'post': Post}
コード例 #4
0
def es_delete_cmd(index_name):
    """Delete a specified index

    :arg index_name: name of index to delete

    """
    indexes = [name for name, count in get_indexes()]

    if index_name not in indexes:
        log.error('Index "%s" is not a valid index.', index_name)
        if not indexes:
            log.error('There are no valid indexes.')
        else:
            log.error('Valid indexes: %s', ', '.join(indexes))
        return

    ret = raw_input(
        'Are you sure you want to delete "%s"? (yes/no) ' % index_name
    )
    if ret != 'yes':
        return

    log.info('Deleting index "%s"...', index_name)
    index = Index(name=index_name, using='default')
    try:
        index.delete()
    except NotFoundError:
        pass
    log.info('Done!')
コード例 #5
0
ファイル: search.py プロジェクト: Carlosedo/mixees
    def create_index_if_does_not_exist(cls):
        index = Index(cls.INDEX_NAME)
        index.doc_type(cls)

        if not index.connection.indices.exists(cls.INDEX_NAME):
            index.create()
            time.sleep(1)  # It takes some time to create the index
コード例 #6
0
def test_analyzers_returned_from_to_dict():
    random_analyzer_name = ''.join((choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name, tokenizer="standard", filter="standard")
    index = Index('i', using='alias')
    index.analyzer(random_analyzer)

    assert index.to_dict()["settings"]["analysis"]["analyzer"][random_analyzer_name] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
コード例 #7
0
def test_index_template_can_have_order():
    i = Index('i-*')
    it = i.as_template('i', order=2)

    assert {
        "index_patterns": ["i-*"],
        "order": 2
    } == it.to_dict()
コード例 #8
0
def test_aliases_returned_from_to_dict():
    random_alias = ''.join((choice(string.ascii_letters) for _ in range(100)))
    alias_dict = {random_alias: {}}

    index = Index('i', using='alias')
    index.aliases(**alias_dict)

    assert index._aliases == index.to_dict()['aliases'] == alias_dict
コード例 #9
0
ファイル: utils.py プロジェクト: craig5/warehouse
def get_index(name, doc_types, *, using, shards=1, replicas=0, interval="1s"):
    index = Index(name, using=using)
    for doc_type in doc_types:
        index.doc_type(doc_type)
    index.settings(
        number_of_shards=shards, number_of_replicas=replicas, refresh_interval=interval
    )
    return index
コード例 #10
0
def test_aliases_add_to_object():
    random_alias = ''.join((choice(string.ascii_letters) for _ in range(100)))
    alias_dict = {random_alias: {}}

    index = Index('i', using='alias')
    index.aliases(**alias_dict)

    assert index._aliases == alias_dict
コード例 #11
0
def test_index_can_be_saved_even_with_settings(write_client):
    i = Index('test-blog', using=write_client)
    i.settings(number_of_shards=3, number_of_replicas=0)
    i.save()
    i.settings(number_of_replicas=1)
    i.save()

    assert '1' == i.get_settings()['test-blog']['settings']['index']['number_of_replicas']
コード例 #12
0
    def initialize_index(self, delete_if_exists=False):
        """
        Initialize index with mapping in ElasticSearch

        :param delete_if_exists: delete index, if exists
        :return: None
        """

        def update_index_settings():
            """
            Function updates settings for slovenian lemmatization of words.
            As far as we know, elasticsearch-dsl library does not support
            custom filter settings.

            :return: None
            """
            analysis_settings = {
                "analysis": {
                    "filter": {
                        "lemmagen_filter_sl": {
                            "type": "lemmagen",
                            "lexicon": "sl"
                        }
                    },
                    "analyzer": {
                        "lemmagen_sl": {
                            "type": "custom",
                            "tokenizer": "uax_url_email",
                            "filter": [
                                "lemmagen_filter_sl",
                                "lowercase"
                            ]
                        }
                    }
                }
            }
            self.client.cluster.health(index=self.index_name,
                                       wait_for_status='green',
                                       request_timeout=2)
            self.client.indices.close(index=self.index_name)
            self.client.indices.put_settings(json.dumps(analysis_settings),
                                             index=self.index_name)
            self.client.indices.open(index=self.index_name)

        index = Index(self.index_name, using=self.client)
        if delete_if_exists and index.exists():
            index.delete()

        index.settings(
            # use higher number in production
            number_of_replicas=0
        )

        # register models
        index.doc_type(Document)
        index.create()
        update_index_settings()  # set lemmanizer
コード例 #13
0
 def run(self, *args, **options):
     self.confirm(
         u"Are you really sure you want to delete the index '{0}' ?"
         .format(self.index_name)
     )
     index = Index(self.index_name)
     if not self.dry_run:
         index.delete()
     self.print_success(u"Index {0} deleted.".format(self.index_name))
コード例 #14
0
ファイル: initializees.py プロジェクト: mfournier/v6_api
def drop_index(silent=True):
    """Remove the ElasticSearch index.
    """
    index = Index(elasticsearch_config['index'])
    try:
        index.delete()
    except Exception as exc:
        if not silent:
            raise exc
コード例 #15
0
ファイル: api.py プロジェクト: HEPData/hepdata3
    def recreate_index(self):
        """ Delete and then create a given index and set a default mapping.

        :param index: [string] name of the index. If None a default is used
        """
        submission = Index(self.index)
        submission.delete(ignore=404)

        ESSubmission.init()
コード例 #16
0
def test_delete(write_client):
    write_client.indices.create(
        index='test-index',
        body={'settings': {'number_of_replicas': 0, 'number_of_shards': 1}}
    )

    i = Index('test-index', using=write_client)
    i.delete()
    assert not write_client.indices.exists(index='test-index')
コード例 #17
0
ファイル: base.py プロジェクト: CSIS-iLab/new-silk-road
class BaseSearchTestCase(TestCase):

    def setUp(self):
        from django.conf import settings
        SEARCH = getattr(settings, 'SEARCH')

        connections.create_connection('testing', **SEARCH['default']['connections'])
        self.index = Index(SEARCH['default']['index'], using='testing')
        # This is needed for test_documents, but has side effects in all running tests
        doctypes_list = (
            value for name, value
            in inspect.getmembers(documents)
            if not name.startswith('_') and
            inspect.isclass(value) and
            issubclass(value, DocType) and
            name != DocType.__name__
        )

        for doctype in doctypes_list:
            # Remove assigned index
            doctype._doc_type.index = None
            # Associate docs with test index
            self.index.doc_type(doctype)

        if self.index.exists():
            self.index.delete(ignore=404)
        self.index.create()

        self.search = Search(index=SEARCH['default']['index'])

    def tearDown(self):
        self.index.delete()
        queue = django_rq.get_queue()
        queue.empty()
コード例 #18
0
def test_cloned_index_has_copied_settings_and_using():
    client = object()
    i = Index('my-index', using=client)
    i.settings(number_of_shards=1)

    i2 = i.clone('my-other-index')

    assert 'my-other-index' == i2._name
    assert client is i2._using
    assert i._settings == i2._settings
    assert i._settings is not i2._settings
コード例 #19
0
    def test_create_index_manually(self):
        out = io.StringIO()
        index_name = 'test_manually_created_index'
        call_command('create_index', index_name, stdout=out)
        self.assertIn("Created search index '{}'".format(index_name), out.getvalue())

        index = Index(index_name)
        self.assertTrue(index.exists())

        index.delete()
        self.assertFalse(index.exists())
コード例 #20
0
def test_registered_doc_type_included_in_to_dict():
    i = Index('i', using='alias')
    i.document(Post)

    assert {
        'mappings': {
            'properties': {
                'title': {'type': 'text'},
                'published_from': {'type': 'date'},
            }
        }
    } == i.to_dict()
コード例 #21
0
    def test_create_index_usings_settings(self):
        out = io.StringIO()
        call_command('create_index', stdout=out)

        self.assertIn("Creating search indices from settings", out.getvalue())
        self.assertIn("Created search index '{}'".format(self.settings['default']['index']), out.getvalue())

        index = Index(self.settings['default']['index'])
        self.assertTrue(index.exists())

        index.delete()
        self.assertFalse(index.exists())
コード例 #22
0
def test_conflicting_mapping_raises_error_in_index_to_dict():
    class A(document.Document):
        name = field.Text()

    class B(document.Document):
        name = field.Keyword()

    i = Index('i')
    i.document(A)
    i.document(B)

    with raises(ValueError):
        i.to_dict()
コード例 #23
0
ファイル: ElasticBurp.py プロジェクト: Cyber-Forensic/WASE
 def applyConfig(self):
     try:
         print("Connecting to '%s', index '%s'" % (self.confESHost, self.confESIndex))
         res = connections.create_connection(hosts=[self.confESHost])
         idx = Index(self.confESIndex)
         idx.doc_type(DocHTTPRequestResponse)
         DocHTTPRequestResponse.init()
         try:
             idx.create()
         except:
             pass
     except Exception as e:
         JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>" % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)
コード例 #24
0
ファイル: publish.py プロジェクト: densho/encyc-core
def create_index(hosts, index):
    i = set_hosts_index(hosts=hosts, index=index)
    logprint('debug', 'creating new index')
    i = Index(index)
    i.create()
    logprint('debug', 'registering doc types')
    i.doc_type(Author)
    i.doc_type(Page)
    i.doc_type(Source)
    logprint('debug', 'DONE')
コード例 #25
0
    def test_es_create_documents(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents')

        # index_name not in settings.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_documents',
                index_name='barfoo'
            )

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_documents',
                index_name='foobar'
            )

        index = Index('foobar')
        doc_type = Token.get_es_doc_type()
        index.doc_type(doc_type)
        index.create()
        self.refresh()

        # Disable auto indexing while creating objects.
        settings.TRAMPOLINE['OPTIONS']['disabled'] = True
        token = Token.objects.create(name="token")
        token_not_indexable = Token.objects.create(name='not_indexable')
        token_raise_exception = Token.objects.create(name='raise_exception')
        settings.TRAMPOLINE['OPTIONS']['disabled'] = False

        # Dry run.
        call_command(
            'es_create_documents',
            index_name='foobar',
            dry_run=True
        )
        self.assertDocDoesntExist(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)

        call_command(
            'es_create_documents',
            index_name='foobar',
            verbosity=3
        )
        self.assertDocExists(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)
コード例 #26
0
    def test_es_create_alias(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                target_name='foobar_target'
            )

        # Target name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                index_name='foobar'
            )

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                index_name='foobar',
                target_name='foobar_target'
            )

        index = Index('foobar_target')
        index.create()
        self.refresh()

        # Alias with same name as index.
        with self.assertRaises(SystemExit):
            call_command(
                'es_create_alias',
                index_name='foobar_target',
                target_name='foobar_target'
            )

        # Dry run.
        call_command(
            'es_create_alias',
            index_name='foobar',
            target_name='foobar_target',
            dry_run=True
        )
        self.assertAliasDoesntExist(index='foobar_target', name='foobar')

        call_command(
            'es_create_alias',
            index_name='foobar',
            target_name='foobar_target'
        )
        self.assertAliasExists(index='foobar_target', name='foobar')
コード例 #27
0
def test_registered_doc_type_included_in_to_dict():
    i = Index('i', using='alias')
    i.doc_type(Post)

    assert Post._doc_type.index == 'i'
    assert {
        'mappings': {
            'post': {
                'properties': {
                    'title': {'type': 'string'},
                    'published_from': {'type': 'date'},
                }
            }
        }
    } == i.to_dict()
コード例 #28
0
ファイル: ElasticBurp.py プロジェクト: kartikeyap/WASE
    def registerExtenderCallbacks(self, callbacks):
        self.callbacks = callbacks
        self.helpers = callbacks.getHelpers()
        callbacks.setExtensionName("Storing HTTP Requests/Responses into ElasticSearch")
        self.callbacks.registerHttpListener(self)
        self.callbacks.registerContextMenuFactory(self)
        self.out = callbacks.getStdout()

        res = connections.create_connection(hosts=[ES_host])
        idx = Index(ES_index)
        idx.doc_type(DocHTTPRequestResponse)
        try:
            idx.create()
        except:
            print("Index already exists")
コード例 #29
0
def test_doc_type_can_be_set():
    i = Index('i', doc_type='t')
    m = Mapping('t')
    m.field('title', Text())
    i.mapping(m)

    assert {
        'mappings': {
            't': {
                'properties': {
                    'title': {'type': 'text'}
                }
            }
        }
    } == i.to_dict()
コード例 #30
0
    def test_es_delete_alias(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                target_name='foobar_target'
            )

        # Target name required.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                index_name='foobar'
            )

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                index_name='foobar',
                target_name='foobar_target',
                yes=True
            )

        index = Index('foobar_target')
        index.create()
        self.refresh()

        # Alias doesn't exist.
        with self.assertRaises(SystemExit):
            call_command(
                'es_delete_alias',
                index_name='foobar',
                target_name='foobar_target',
                yes=True
            )

        trampoline_config.connection.indices.put_alias(
            index='foobar_target', name='foobar')
        self.assertAliasExists(index='foobar_target', name='foobar')

        call_command(
            'es_delete_alias',
            index_name='foobar',
            target_name='foobar_target',
            yes=True
        )
        self.assertAliasDoesntExist(index='foobar_target', name='foobar')
コード例 #31
0
ファイル: aggregations.py プロジェクト: slint/invenio-stats
 def run(self, start_date=None, end_date=None, update_bookmark=True):
     """Calculate statistics aggregations."""
     # If no events have been indexed there is nothing to aggregate
     if not Index(self.event_index, using=self.client).exists():
         return
     lower_limit = start_date or self.get_bookmark()
     # Stop here if no bookmark could be estimated.
     if lower_limit is None:
         return
     upper_limit = min(
         end_date or datetime.datetime.max,  # ignore if `None`
         datetime.datetime.utcnow().replace(microsecond=0),
         datetime.datetime.combine(
             lower_limit + datetime.timedelta(self.batch_size),
             datetime.datetime.min.time())
     )
     while upper_limit <= datetime.datetime.utcnow():
         self.indices = set()
         self.new_bookmark = upper_limit.strftime(self.doc_id_suffix)
         bulk(self.client,
              self.agg_iter(lower_limit, upper_limit),
              stats_only=True,
              chunk_size=50)
         # Flush all indices which have been modified
         current_search_client.indices.flush(
             index=','.join(self.indices),
             wait_if_ongoing=True
         )
         if update_bookmark:
             self.set_bookmark()
         self.indices = set()
         lower_limit = lower_limit + datetime.timedelta(self.batch_size)
         upper_limit = min(
             end_date or datetime.datetime.max,  # ignore if `None``
             datetime.datetime.utcnow().replace(microsecond=0),
             lower_limit + datetime.timedelta(self.batch_size)
         )
         if lower_limit > upper_limit:
             break
コード例 #32
0
def create_indexes(names, settings=None):
    """
    Create Elasticsearch indexes

    Args:
        names (list): A list of index names
        settings (dict): Index settings

    """
    for name in names:
        index = Index(name)
        try:
            if not index.exists():
                logger.debug("Creating Elasticsearch index: {0}".format(name))
                if settings:
                    index.put_settings(settings)
                index.create()
        except Exception as e:
            raise ElasticsearchError(
                "Elasticsearch error: {0}".format(e.__str__()))
コード例 #33
0
class ElasticSearch:
    __logger = logging
    __client = Elasticsearch()
    __index_name = "merchant_services"
    __doc_type = MerchantServicesDocument
    __index = Index(__index_name)

    def search(self, query):
        self.__logger.debug("in: {}".format(query))
        s = Search(using=self.__client,
                   index=self.__index_name,
                   doc_type=self.__doc_type)
        # q = Q('match', stem_terms=query)
        q = Q('match', terms=query)
        s = s.query(q)
        responses = s.execute()

        documents = []
        if responses.success():
            for hit in responses.hits:
                documents.append(
                    MerchantServices.objects.get(native_id=hit.native_id))

        self.__logger.debug("out: {}".format([
            'id: {}, name: {}'.format(doc.get_native_id(), doc.get_name())
            for doc in documents
        ]))
        return documents

    def bulk_indexing(self):
        self.__index.delete(ignore=404)
        self.__index.create()
        self.__client.indices.close(index=self.__index_name)
        self.__doc_type.init()
        self.__client.indices.open(index=self.__index_name)

        bulk(client=self.__client,
             actions=(b.indexing()
                      for b in MerchantServices.objects.all().iterator()))
コード例 #34
0
    def handle(self, *args, **options):
        from elasticsearch import Elasticsearch
        from elasticsearch_dsl import Index, Mapping
        ELASTICSEARCH_HOSTS = ['localhost']
        ELASTICSEARCH_INDEX = 'kindle2'

        es = Elasticsearch(ELASTICSEARCH_HOSTS)
        newindex = Index(ELASTICSEARCH_INDEX, using=es)
        if newindex.exists():
            exit('index already exists,change a new name')
        mp = Mapping()
        mp.field('title', 'text')
        mp.field('creator', 'text')
        mp.field('publisher', 'text')
        mp.field('iclass', 'text')
        mp.field('isbn', 'text')
        mp.field('asin', 'keyword')

        newindex.mapping(mp)
        newindex.create()
コード例 #35
0
ファイル: private_data.py プロジェクト: owaisj/portal
    def construct_query(self, system, file_path=None):

        files_index_name = Index(
            settings.ES_INDEX_PREFIX.format('files')).get_alias().keys()[0]

        if system == settings.AGAVE_STORAGE_SYSTEM:
            storage_prefix_query = Q(
                {'prefix': {
                    'path._exact': '/' + self.username
                }})
        else:
            storage_prefix_query = Q({'prefix': {'path._exact': '/'}})

        ngram_query = Q("query_string",
                        query=self.query_string,
                        fields=["name"],
                        minimum_should_match='80%',
                        default_operator='or')

        match_query = Q("query_string",
                        query=self.query_string,
                        fields=["name._exact", "name._pattern"],
                        default_operator='and')

        private_files_query = Q(
            'bool',
            must=[
                Q({'term': {
                    '_index': files_index_name
                }}),
                Q({'term': {
                    'system._exact': system
                }}), storage_prefix_query, (ngram_query | match_query)
            ],
            must_not=[Q({"prefix": {
                "path._exact": "/.Trash"
            }})])

        return private_files_query
コード例 #36
0
def customer_index(app):
    """Initialize the `Customer` doc type."""
    test_index = Index(uuid4().hex)
    test_index.create()
    app.cluster.health(wait_for_status='yellow')

    # monkey patch `auth_index`
    original_auth_index = auth_models.auth_index
    auth_models.auth_index = test_index

    Customer.init(index=test_index._name)
    Customer._doc_type.index = test_index._name

    yield test_index

    auth_models.auth_index = original_auth_index
    test_index.delete()
コード例 #37
0
 def applyConfig(self):
     try:
         print("Connecting to '%s', index '%s'" %
               (self.confESHost, self.confESIndex))
         res = connections.create_connection(hosts=[self.confESHost])
         idx = Index(self.confESIndex)
         idx.doc_type(DocHTTPRequestResponse)
         DocHTTPRequestResponse.init()
         try:
             idx.create()
         except:
             pass
     except Exception as e:
         JOptionPane.showMessageDialog(
             self.panel,
             "<html><p style='width: 300px'>Error while initializing ElasticSearch: %s</p></html>"
             % (str(e)), "Error", JOptionPane.ERROR_MESSAGE)
コード例 #38
0
 def handle(self, *args, **options):
     index = "kplc_interruptions"
     es = Elasticsearch([{
         'host': settings.ES_SETTINGS['HOST'],
         'port': settings.ES_SETTINGS['PORT']
     }],
                        index=index)
     kplc_index = Index(index, using=settings.ES_SETTINGS['ALIAS'])
     kplc_index.document(InterruptionPdfTextDoc)
     if kplc_index.exists():
         kplc_index.delete()
         # TODO: Use logger
         print('Deleted kplc interruptions index.')
     InterruptionPdfTextDoc.init()
     result = bulk(
         client=es,
         actions=(pdf.index()
                  for pdf in InterruptionPdfText.objects.all().iterator()))
     # TODO: Use logger
     print('Indexed kplc interruptions.')
     print(result)
コード例 #39
0
    def clean(self):
        cleaned_data = super().clean()
        param = cleaned_data.get("param")
        query = self.data['query']
        search_type = cleaned_data.get("search_type")

        if search_type == 'simple':
            try:
                elastic_field = SimpleSearchField.objects.get(
                    pk=param).elastic_index_field
                query = prepare_simple_query(query, elastic_field.field_type)
            except SimpleSearchField.DoesNotExist:
                raise forms.ValidationError("Невірний параметр запиту")
        else:
            inid_code = InidCodeSchedule.objects.filter(
                ipc_code=param,
                enable_search=1,
                elastic_index_field__isnull=False).first()
            if inid_code:
                elastic_field = inid_code.elastic_index_field
                query = prepare_advanced_query(query, elastic_field.field_type)
            else:
                raise forms.ValidationError("Невірний параметр запиту")

        # Валидация запроса в ElasticSearch
        client = Elasticsearch(settings.ELASTIC_HOST,
                               timeout=settings.ELASTIC_TIMEOUT)
        i = Index(settings.ELASTIC_INDEX_NAME, using=client).validate_query(
            body={
                'query':
                Q('query_string',
                  query=query,
                  default_field=elastic_field.field_name,
                  default_operator='AND').to_dict()
            })

        if not i['valid']:
            raise forms.ValidationError("Невірний запит")
コード例 #40
0
    def construct_query(self, system, file_path=None):

        files_index_name = Index('des-files').get_alias().keys()[0]

        if system == settings.AGAVE_STORAGE_SYSTEM:
            storage_prefix_query = Q({'prefix': {'path._exact': '/' + self.username}})
        else:
            storage_prefix_query = Q({'prefix': {'path._exact': '/'}})

        private_files_query = Q(
            'bool',
            must=[
                Q({'term': {'_index': files_index_name}}),
                Q({'term': {'system._exact': system}}),
                storage_prefix_query,
                Q("query_string", query=self.query_string, default_operator="and")
            ],
            must_not=[
                Q({"prefix": {"path._exact": "/.Trash"}})
            ]
        )

        return private_files_query
コード例 #41
0
def create_index():
    try:
        create_connection()
        db = Index(INDEX_NAME)
        db.settings(**INDEX_SETTINGS)
        db.create()
    except Exception as e:
        ActivityLog.objects.create_log(
            None,
            level='C',
            view_name='elastic_search.es_core_config.create_index',
            message=
            'Error in creating index in ElasticSearch with error message - %s'
            % e.message,
            traceback=traceback.format_exc())
        raise Exception(e)
    else:
        return db
コード例 #42
0
    def run(self, start_date=None, end_date=None, update_bookmark=True):
        """Calculate statistics aggregations."""
        # If no events have been indexed there is nothing to aggregate
        if not Index(self.event_index, using=self.client).exists():
            return

        lower_limit = self.bookmark_api.get_lower_limit(start_date)

        # Stop here if no bookmark could be estimated.
        if lower_limit is None:
            return

        upper_limit = self.bookmark_api.get_upper_limit(
            start_date, end_date, self.batch_size)

        while upper_limit <= datetime.datetime.utcnow() and self.has_events:
            self.indices = set()

            bulk(self.client,
                 self.agg_iter(lower_limit, upper_limit),
                 stats_only=True,
                 chunk_size=50)
            # Flush all indices which have been modified
            current_search.flush_and_refresh(index='*')
            if update_bookmark and self.has_events:
                self.bookmark_api.set_bookmark(
                    upper_limit.strftime(self.doc_id_suffix)
                    or datetime.datetime.utcnow().strftime(self.doc_id_suffix))

            lower_limit = lower_limit + datetime.timedelta(self.batch_size)
            upper_limit = min(
                end_date or datetime.datetime.max,  # ignore if `None``
                datetime.datetime.utcnow().replace(microsecond=0),
                lower_limit + datetime.timedelta(self.batch_size))
            if lower_limit > upper_limit:
                break
コード例 #43
0
def create_index(client, *, index_name, **kwargs):
    p = IngestClient(client)
    p.put_pipeline(id='document_attachment',
                   body={
                       'description': "Extract attachment information",
                       'processors': [{
                           "attachment": {
                               "field": "source_file"
                           }
                       }]
                   })

    index = Index(index_name, using=client)
    index.doc_type(Document)
    try:
        index.create()
    except RequestError:
        print(f"Index named '{index_name}' already exists", file=sys.stderr)
        sys.exit(1)
コード例 #44
0
def create_indexes(names=None, settings=None):
    """
    Create Elasticsearch indexes

    Args:
        names (list): A list of index names
        ["dmarc_aggregate", "dmarc_forensic"] by default
        settings (dict): Index settings

    """
    if names is None:
        names = ["dmarc_aggregate", "dmarc_forensic"]
    for name in names:
        index = Index(name)
        try:
            if not index.exists():
                logger.debug("Creating Elasticsearch index: {0}".format(name))
                if settings:
                    index.put_settings(settings)
                index.create()
        except Exception as e:
            raise ElasticsearchError(
                "Elasticsearch error: {0}".format(e.__str__()))
コード例 #45
0
def test_cloned_index_has_analysis_attribute():
    """
    Regression test for Issue #582 in which `Index.clone()` was not copying
    over the `_analysis` attribute.
    """
    client = object()
    i = Index('my-index', using=client)

    random_analyzer_name = ''.join(
        (choice(string.ascii_letters) for _ in range(100)))
    random_analyzer = analyzer(random_analyzer_name,
                               tokenizer="standard",
                               filter="standard")

    i.analyzer(random_analyzer)

    i2 = i.clone('my-clone-index')

    assert i.to_dict()['settings']['analysis'] == i2.to_dict(
    )['settings']['analysis']
コード例 #46
0
    def mitm_request(self, data):
        # Initialize ES connection and index
        res = connections.create_connection(hosts=[args.elasticsearch])
        idx = Index(args.index)
        idx.doc_type(DocHTTPRequestResponse)
        try:
            DocHTTPRequestResponse.init()
            idx.create()
        except:
            pass

        r = HTTPRequest(data)

        # determine url
        if self.is_connect:
            scheme = "https"
        else:
            scheme = "http"
        url = scheme + "://" + self.hostname
        if scheme == "http" and int(
                self.port) != 80 or scheme == "https" and int(
                    self.port) != 443:
            url += ":" + self.port
        url += self.path

        if args.verbose:
            print(url)

        self.doc = DocHTTPRequestResponse(host=self.hostname,
                                          port=int(self.port),
                                          protocol=scheme)
        self.doc.meta.index = args.index
        self.doc.request.url = url
        self.doc.request.requestline = r.requestline
        self.doc.request.method = r.command
        self.doc.host = self.hostname
        self.doc.port = int(self.port)
        self.doc.protocol = scheme

        return data
コード例 #47
0
    def register(self, name=None, version=None, settings=None):
        """
        Register an index locally.

        Note that `createall` is needed to save the index to Elasticsearch.

        The index will be named per convention such that:
         -  The graph's name is used by default
         -  The "test" suffix is added for unit testing (to avoid clobbering real data)

        If version is provided, it will be used to create generate an alias (to the unversioned name).

        """
        if version is None:
            index_name = IndexRegistry.name_for(self.graph, name=name)
            alias_name = None
        else:
            # create index with full version, alias to shortened version
            index_name = IndexRegistry.name_for(self.graph,
                                                name=name,
                                                version=version)
            alias_name = IndexRegistry.name_for(self.graph, name=name)

        if index_name in self.indexes:
            raise Exception(
                "Index already registered for name: {}".format(index_name))

        index = Index(
            name=index_name,
            using=self.graph.elasticsearch_client,
        )

        if settings:
            index.settings(**settings)

        if alias_name is not None:
            index.aliases(**{alias_name: {}})

        self.indexes[index_name] = index
        return index
コード例 #48
0
    def test_es_create_documents(self):
        # Index name required.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents')

        # index_name not in settings.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents', index_name='barfoo')

        # Index doesn't exist.
        with self.assertRaises(SystemExit):
            call_command('es_create_documents', index_name='foobar')

        index = Index('foobar')
        doc_type = Token.get_es_doc_type()
        index.doc_type(doc_type)
        index.create()
        self.refresh()

        # Disable auto indexing while creating objects.
        settings.TRAMPOLINE['OPTIONS']['disabled'] = True
        token = Token.objects.create(name="token")
        token_not_indexable = Token.objects.create(name='not_indexable')
        token_raise_exception = Token.objects.create(name='raise_exception')
        settings.TRAMPOLINE['OPTIONS']['disabled'] = False

        # Dry run.
        call_command('es_create_documents', index_name='foobar', dry_run=True)
        self.assertDocDoesntExist(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)

        call_command('es_create_documents', index_name='foobar', verbosity=3)
        self.assertDocExists(token)
        self.assertDocDoesntExist(token_not_indexable)
        self.assertDocDoesntExist(token_raise_exception)
コード例 #49
0
def test_search_is_limited_to_index_name():
    i = Index('my-index')
    s = i.search()

    assert s._index == ['my-index']
コード例 #50
0
def exists():
    return Index(APIDoc.Index.name).exists()
コード例 #51
0
def refresh():

    index = Index(APIDoc.Index.name)
    index.refresh()
コード例 #52
0
def delete():
    Index(APIDoc.Index.name).delete()
コード例 #53
0
        Should return list of family member names
        """
        family = getattr(self.general, "family", None)
        if family:
            for member in family:
                if hasattr(member, "family_name"):
                    yield member.family_name
        else:
            for member in parse_raw_family_string(
                getattr(self.general, "family_raw", "")
            ):
                if "family_name" in member:
                    yield member["family_name"]


declarations_idx = Index(OLD_DECLARATION_INDEX)
declarations_idx.settings(
    number_of_shards=NUMBER_OF_SHARDS, number_of_replicas=NUMBER_OF_REPLICAS
)
declarations_idx.analyzer(namesAutocompleteAnalyzer)
declarations_idx.analyzer(namesAutocompleteSearchAnalyzer)


@declarations_idx.doc_type
class Declaration(DocType, AbstractDeclaration):
    """Declaration document.
    Assumes there's a dynamic mapping with all fields not indexed by default."""

    persons = Text(analyzer="ukrainian", copy_to="all")
    countries = Text(analyzer="ukrainian", copy_to="all")
    companies = Text(analyzer="ukrainian", copy_to="all")
    def handle(self, *args, **options):
        text_analyzer = get_text_analyzer("german")
        elastic_index = Index("mst_debug")
        if not elastic_index.exists():
            elastic_index.create()
        elastic_index.close()
        elastic_index.analyzer(text_analyzer)
        elastic_index.save()
        elastic_index.open()
        elastic_index.flush()

        for word in options["words"]:
            analysis = elastic_index.analyze(body={
                "analyzer": "text_analyzer",
                "text": word
            })
            tokens = [i["token"] for i in analysis["tokens"]]
            self.stdout.write("{} {}\n".format(word, tokens))
コード例 #55
0
ファイル: guesser.py プロジェクト: Pinafore/qb-api
 def exists():
     return Index(INDEX_NAME).exists()
コード例 #56
0
ファイル: guesser.py プロジェクト: Pinafore/qb-api
 def delete():
     try:
         Index(INDEX_NAME).delete()
     except elasticsearch.exceptions.NotFoundError:
         log.info(
             'Could not delete non-existent index, creating new index...')
コード例 #57
0
def index_doc(doc, index_name="wiki-dumps"):
    global es
    index = Index(index_name)
    index.create()
コード例 #58
0
    def construct_query(self, system=None, file_path=None, **kwargs):
        project_query_fields = [
            "projectId", "title", "description", "doi", "publications", "pis",
            "name"
        ]
        published_index_name = list(
            Index(settings.ES_INDEX_PREFIX.format(
                'publications')).get_alias().keys())[0]
        legacy_index_name = list(
            Index(settings.ES_INDEX_PREFIX.format(
                'publications-legacy')).get_alias().keys())[0]
        filter_queries = []
        if kwargs.get('type_filters'):
            for type_filter in kwargs['type_filters']:
                if type_filter == 'nees':
                    type_query = Q({'term': {'_index': legacy_index_name}})
                else:
                    type_query = Q(
                        'term',
                        **{'project.value.projectType._exact': type_filter})
                filter_queries.append(type_query)

        ds_user_query = Q({
            "nested": {
                "path": "users",
                "ignore_unmapped": True,
                "query": {
                    "query_string": {
                        "query":
                        self.query_string,
                        "fields": [
                            "users.first_name", "users.last_name",
                            "user.username"
                        ],
                        "lenient":
                        True
                    }
                }
            }
        })
        nees_pi_query = Q({
            "nested": {
                "path": "pis",
                "ignore_unmapped": True,
                "query": {
                    "query_string": {
                        "query": self.query_string,
                        "fields": ["pis.firstName", "pis.lastName"],
                        "lenient": True
                    }
                }
            }
        })
        pub_query = Q('query_string',
                      query=self.query_string,
                      default_operator='and',
                      fields=project_query_fields)

        published_query = Q(
            'bool',
            must=[
                Q('bool', should=[ds_user_query, nees_pi_query, pub_query]),
                Q({'term': {
                    '_index': legacy_index_name
                }}),
            ],
            must_not=[
                Q('term', status='unpublished'),
                Q('term', status='saved')
            ])

        return published_query
コード例 #59
0
ファイル: views.py プロジェクト: owaisj/portal
    def get(self, request):
        """GET handler."""
        q = request.GET.get('query_string')
        offset = int(request.GET.get('offset', 0))
        limit = int(request.GET.get('limit', 10))
        if limit > 500:
            return HttpResponseBadRequest("limit must not exceed 500")
        type_filter = request.GET.get('type_filter', 'all')

        doc_type_map = {
            Index(settings.ES_INDEX_PREFIX.format('publications')).get_alias().keys(
            )[0]:
            'publication',
            Index(settings.ES_INDEX_PREFIX.format('publications-legacy')).get_alias(
            ).keys()[0]:
            'publication',
            Index(settings.ES_INDEX_PREFIX.format('files')).get_alias().keys()[0]:
            'file',
            Index(settings.ES_INDEX_PREFIX.format('cms')).get_alias().keys()[0]:
            'modelresult'
        }

        public_files_query = CommunityDataSearchManager(
            request).construct_query() | PublishedDataSearchManager(
                request).construct_query()
        publications_query = PublicationsSearchManager(
            request).construct_query()
        cms_query = es_query = CMSSearchManager(request).construct_query()

        if type_filter == 'public_files':
            es_query = Search().query(public_files_query)
        elif type_filter == 'published':
            es_query = Search().query(publications_query)
        elif type_filter == 'cms':
            es_query = Search().query(cms_query).highlight(
                'body',
                fragment_size=100).highlight_options(pre_tags=["<b>"],
                                                     post_tags=["</b>"],
                                                     require_field_match=False)
        elif type_filter == 'all':
            es_query = Search().query(public_files_query | publications_query
                                      | cms_query).highlight(
                                          'body',
                                          fragment_size=100).highlight_options(
                                              pre_tags=["<b>"],
                                              post_tags=["</b>"],
                                              require_field_match=False)

        try:
            res = es_query.execute()
        except (TransportError, ConnectionTimeout) as err:
            if getattr(err, 'status_code', 500) == 404:
                raise
            res = es_query.execute()

        out = {}
        hits = []

        for r in res:
            d = r.to_dict()
            d["doc_type"] = doc_type_map[r.meta.index]
            if hasattr(r.meta, 'highlight'):
                highlight = r.meta.highlight.to_dict()
                d["highlight"] = highlight
            if r.meta.doc_type == 'publication' and hasattr(r, 'users'):
                users = r.users
                pi = r.project.value.pi
                pi_user = filter(lambda x: x.username == pi, users)[0]
                d["piLabel"] = "{}, {}".format(pi_user.last_name,
                                               pi_user.first_name)
            hits.append(d)

        out['total_hits'] = res.hits.total.value
        out['hits'] = hits
        out['all_total'] = Search().query(public_files_query
                                          | publications_query
                                          | cms_query).count()
        out['public_files_total'] = Search().query(public_files_query).count()
        out['published_total'] = Search().query(publications_query).count()
        out['cms_total'] = Search().query(cms_query).count()

        return JsonResponse(out, safe=False)
コード例 #60
0
# coding:utf-8
'''
@author = super_fazai
@File    : search.py
@Time    : 2017/8/11 10:41
@connect : [email protected]
'''

from elasticsearch_dsl import (
    DocType,
    Index,
)
from scrapy import Field


class Post():
    id = Field()


posts = Index('posts')


@posts.doc_type
class PostDocument(DocType):
    class Meta:
        model = Post

        fields = [
            'id',
        ]