예제 #1
0
 def parallel_prep(self):
     # this is kind of a hack for elasticsearch connections to be
     # 'reset' with their existing settings kept intact
     for label in list(connections.connections._conns.keys()):
         # remove the existing connection (but keeps the _kwargs settings for it
         connections.connections._conns.pop(label, None)
         # recreate the connection using the retained _kwargs (view the source)
         connections.get_connection(label)
예제 #2
0
async def test_get_connection():
    conn = create_connection()
    assert conn is get_connection()
    assert conn is get_connection('async')
    assert conn is connections.get_connection('async')

    with raises(KeyError):
        get_connection('default')
        connections.get_connection()
예제 #3
0
def new_index(
    index_base_name: str,
    document_cls: Type[_T_BaseDocument],
    *,
    move_data: bool = False,
    update_alias: bool = True,
) -> str:
    """Creates a new Index with mapping settings from given class.

    The index is versioned by including the current timestamp in its name. Through this,
    existing previous indices with potentially incompatible mappings will not be
    affected. An alias is pointed to the newest index.

    Implements the alias migration pattern, based on:
    https://github.com/elastic/elasticsearch-dsl-py/blob/9b1a39dd47e8678bc4885b03b138293e189471d0/examples/alias_migration.py

    :param index_base_name: The index to create a new version of.
    :param document_cls: The elasticsearch-dsl-based class that defines the mapping.
    :param move_data: If true, reindex all data from the previous index to the new one
          (before updating the alias).
    :param update_alias: If true, move the alias to the newly created index.
    """

    _LOGGER.debug("Creating new index '{}'.", index_base_name)

    new_index_name = index_base_name + "-" + datetime.now().strftime("%Y%m%d-%H%M%S")
    new_index = Index(new_index_name)
    new_index.settings(**document_cls.index_settings())
    # The following is equivalent to `new_index.document(document_cls)` except that it
    # does not add `new_index` as a default index to `document_cls`.
    new_index._doc_types.append(document_cls)
    new_index.create()

    if move_data:
        _LOGGER.info("Reindexing data from previous copy to newly created one...")

        # TODO: test if this works and what happens if no previous index exists.
        connections.get_connection().reindex(
            body={
                "source": {"index": index_base_name},
                "dest": {"index": new_index_name},
            },
            request_timeout=3600,
            # TODO: find out if timeout works for large index
            # TODO: check if parameter name is actually `request_timeout` and not
            #  `timeout` as indicated by source.
        )
        new_index.refresh()

    if update_alias:
        all_indices = Index(index_base_name + "-*")
        if all_indices.exists_alias(name=index_base_name):
            all_indices.delete_alias(name=index_base_name)
        new_index.put_alias(name=index_base_name)

    return new_index_name
예제 #4
0
def setup_elastic_connection(request):
    """Creates a connection to elasticsearch for use in tests.

    If the environment variable PYTEST_ENV is set, and a connection does
    not already exist, a new one will be created. Runs before every test.
    """
    if is_dev_env():
        return

    try:
        connections.get_connection()
    except KeyError:
        elastic_host = os.getenv("ELASTIC_HOST", "localhost")
        connections.create_connection(hosts=[elastic_host])
        initialize_models()
예제 #5
0
def index_listing(files):
    """
    Index the result of a Tapis listing. Files are indexed with a UUID
    comprising the SHA256 hash of the system + path.

    Parameters
    ----------
    files: list
        list of Tapis files (either dict or agavepy.agave.Attrdict)

    Returns
    -------
    Void
    """
    from portal.libs.elasticsearch.docs.base import IndexedFile
    idx = IndexedFile.Index.name
    client = get_connection('default')
    ops = []
    for _file in files:
        file_dict = dict(_file)
        if file_dict['name'][0] == '.':
            continue
        file_dict['lastUpdated'] = current_time()
        file_dict['basePath'] = os.path.dirname(file_dict['path'])
        file_uuid = file_uuid_sha256(file_dict['system'], file_dict['path'])
        ops.append({
            '_index': idx,
            '_id': file_uuid,
            'doc': file_dict,
            '_op_type': 'update',
            'doc_as_upsert': True
            })

    bulk(client, ops)
예제 #6
0
def delete_recursive(system, path):
    """
    Recursively delete all Elasticsearch documents in a specified system/path.

    Parameters
    ----------
    system: str
        The Tapis system ID containing files to be deleted.
    path: str
        The path relative to the system root. All documents with this path as a
        prefix will be deleted.

    Returns
    -------
    Void
    """
    from portal.libs.elasticsearch.docs.base import IndexedFile
    hits = walk_children(system, path, recurse=True)
    idx = IndexedFile.Index.name
    client = get_connection('default')

    # Group children in batches of 100 for bulk deletion.
    for group in grouper(hits, 100):
        filtered_group = filter(lambda hit: hit is not None, group)
        ops = map(lambda hit: {'_index': idx,
                               '_id': hit.meta.id,
                               '_op_type': 'delete'},
                  filtered_group)
        bulk(client, ops)
예제 #7
0
    def parallel_bulk_index(serializer_hash, index, options):
        serializer = Serializer.hash_registry[serializer_hash]

        using = options.get('using', '') or None
        client = connections.get_connection(using or 'default')

        serializer.bulk_operation(index=index, client=client, **options)
예제 #8
0
    def setUp(self):
        self.index = f'test-wine-{uuid.uuid4()}'
        self.connection = connections.get_connection()
        self.connection.indices.create(index=self.index,
                                       body={
                                           'settings': {
                                               'number_of_shards': 1,
                                               'number_of_replicas': 0,
                                           },
                                           'mappings': ES_MAPPING,
                                       })

        # Load fixture data
        fixture_path = pathlib.Path(settings.BASE_DIR / 'catalog' /
                                    'fixtures' / 'test_wines.json')
        with open(fixture_path, 'rt') as fixture_file:
            fixture_data = json.loads(fixture_file.read())
            for wine in fixture_data:
                fields = wine['fields']
                self.connection.create(index=self.index,
                                       id=fields['id'],
                                       body={
                                           'country': fields['country'],
                                           'description':
                                           fields['description'],
                                           'points': fields['points'],
                                           'price': fields['price'],
                                           'variety': fields['variety'],
                                           'winery': fields['winery'],
                                       },
                                       refresh=True)

        # Start patching
        self.mock_constants = patch('catalog.views.constants').start()
        self.mock_constants.ES_INDEX = self.index
예제 #9
0
 def get_score_for_ifra(self):
     from app import connections
     es = connections.get_connection()
     latlon = str(self.location).split(",")
     query = {
         "query": {
             "bool": {
                 "must": {
                     "match_all": {}
                 },
                 "filter": {
                     "geo_distance": {
                         "distance": "10km",
                         "location": {
                             "lat": latlon[0],
                             "lon": latlon[1]
                         }
                     }
                 }
             }
         }
     }
     res = es.search(index='prolepsyspoi', body=query)
     new_score = 0.0
     if 'hits' in res and 'hits' in res['hits'] and len(res['hits']['hits']) > 0:
         for i in res['hits']['hits']:
             new_score += float(i['_source']['score'] if 'score' in i['_source'] else 0)
         new_score = new_score / len(res['hits']['hits'])
     return new_score
예제 #10
0
    def telephones(cls, lat, lon):
        from app import connections
        es = connections.get_connection()

        query = {
            "query": {
                "bool": {
                    "must": {
                        "match_all": {}
                    },
                    "filter": {
                        "geo_distance": {
                            "distance": "10km",
                            "location": {
                                "lat": lat,
                                "lon": lon
                            }
                        }
                    }
                }
            }
        }
        res = es.search(index='users', body=query)
        telephones = []
        if 'hits' in res and 'hits' in res['hits'] and len(res['hits']['hits']) > 0:
            for i in res['hits']['hits']:
                if 'telephone' in i['_source']:
                    telephones.append(i['_source']['telephone'])

        return telephones
def installPipelines():
    conn = get_connection()
    client = IngestClient(conn)
    client.put_pipeline(id='ingest_attachment', body={
        'description': "Extract attachment information",
        'processors': [
            {
                "attachment": {
                    "field": "data"
                },
                "remove": {
                    "field": "data"
                }
            }
        ]
    })
    client.put_pipeline(id='add_timestamp', body={
        'description': "Adds an index_date timestamp",
        'processors': [
            {
                "set": {
                    "field": "index_date",
                    "value": "{{_ingest.timestamp}}",
                },
            },
        ]
    })
예제 #12
0
def create_patterned_index(alias: str,
                           pattern: str,
                           create_alias: bool = True) -> None:
    """Run only one time to setup"""
    name = pattern.replace('*', datetime.datetime.now().strftime('%Y%m%d%H%M'))
    # create_index
    es = connections.get_connection()
    es.indices.create(index=name)
    if create_alias:
        es.indices.update_aliases(
            body={
                'actions': [
                    {
                        "remove": {
                            "alias": alias,
                            "index": pattern
                        }
                    },
                    {
                        "add": {
                            "alias": alias,
                            "index": name
                        }
                    },
                ]
            })
예제 #13
0
    def sanity_check_new_index(self, attempt, document, new_index_name, previous_record_count):
        """ Ensure that we do not point to an index that looks like it has missing data. """
        current_record_count = self.get_record_count(document)
        percentage_change = self.percentage_change(current_record_count, previous_record_count)

        # Verify there was not a big shift in record count
        record_count_is_sane = percentage_change < settings.INDEX_SIZE_CHANGE_THRESHOLD

        # Spot check a known-flaky field type to detect VAN-391
        aggregation_type = Mapping.from_es(new_index_name)['aggregation_key'].name
        record_count_is_sane = record_count_is_sane and aggregation_type == 'keyword'

        if not record_count_is_sane:
            conn = get_connection()
            alternate_current_record_count = conn.search({"query": {"match_all": {}}}, index=new_index_name).get(
                'hits', {}).get('total', {}).get('value', 0)
            message = '''
        Sanity check failed for attempt #{0}.
        Index name: {1}
        Percentage change: {2}
        Previous record count: {3}
        Base record count: {4}
        Search record count: {5}
        Aggregation key type: {6}
                '''.format(
                attempt,
                new_index_name,
                str(int(round(percentage_change * 100, 0))) + '%',
                previous_record_count,
                current_record_count,
                alternate_current_record_count,
                aggregation_type,
            )
            logger.info(message)
            logger.info('...sleeping for 5 seconds...')
            time.sleep(5)
        else:
            message = '''
        Sanity check passed for attempt #{0}.
        Index name: {1}
        Percentage change: {2}
        Previous record count: {3}
        Current record count: {4}
                '''.format(
                attempt,
                new_index_name,
                str(int(round(percentage_change * 100, 0))) + '%',
                previous_record_count,
                current_record_count
            )
            logger.info(message)

        index_info_string = (
            'The previous index contained [{}] records. '
            'The new index contains [{}] records, a [{:.2f}%] change.'.format(
                previous_record_count, current_record_count, percentage_change * 100
            )
        )

        return record_count_is_sane, index_info_string
예제 #14
0
    def handle(self, *args, **options):
        LogEntry.init()
        count = LogEntry_db.objects.count()
        step = 10000
        last = 0
        for i in range(0, count, step):
            entries = []
            for entry_db in LogEntry_db.objects.all()[i:last+step]:
                last += step
                entry = LogEntry(
                    meta={'id': entry_db.pk},
                    action=['create', 'update', 'delete'][entry_db.action],
                    content_type_id=entry_db.content_type.pk,
                    content_type_app_label=entry_db.content_type.app_label,
                    content_type_model=entry_db.content_type.model,
                    object_id=entry_db.object_id,
                    object_pk=entry_db.object_pk,
                    object_repr=entry_db.object_repr,
                    timestamp=entry_db.timestamp
                )
                if entry_db.actor:
                    entry.actor_id = str(entry_db.actor.pk)
                    entry.actor_email = entry_db.actor.email
                    entry.actor_first_name = entry_db.actor.first_name
                    entry.actor_last_name = entry_db.actor.last_name
                if entry_db.remote_addr:
                    entry.remote_addr = entry_db.remote_addr
                if entry_db.changes:
                    entry.changes = [
                        Change(field=key, old=val[0], new=val[1]) for key, val in entry_db.changes.items()
                    ]
                entries.append(entry)

            LogEntry.bulk(connections.get_connection(), entries)
            print(f'Uploaded {i} logs')
예제 #15
0
def setup_index(doctype):
    """
    Create the index template in elasticsearch specifying the mappings and any
    settings to be used. This can be run at any time, ideally at every new code
    deploy.
    """

    alias = doctype._index._name
    pattern = '{alias}-*'.format(alias=alias)

    # create an index template
    index_template = doctype._index.as_template(alias, pattern)
    # upload the template into elasticsearch
    # potentially overriding the one already there
    index_template.save()

    # get the low level connection
    es = get_connection()
    # create the first index if it doesn't exist
    if not es.indices.exists_alias(alias):
        index = get_next_index(pattern)
        es.indices.create(index=index)
        es.indices.update_aliases(
            body={'actions': [
                {
                    "add": {
                        "alias": alias,
                        "index": index
                    }
                },
            ]})
예제 #16
0
파일: index.py 프로젝트: cceh/kosh
 def delete(cls, elex: Dict[str, Any]) -> None:
     '''
 todo: docs
 '''
     idxs = connections.get_connection().indices
     logger().debug('Dropping index %s', elex.uid)
     idxs.delete(ignore=404, index=elex.uid)
예제 #17
0
    async def execute(self, ignore_cache=False, raise_on_error=True):
        """
        Execute the multi search request and return a list of search results.
        """
        if ignore_cache or not hasattr(self, '_response'):
            es = get_connection(self._using)

            responses = await es.msearch(index=self._index,
                                         body=self.to_dict(),
                                         **self._params)

            out = []
            for s, r in zip(self._searches, responses['responses']):
                if r.get('error', False):
                    if raise_on_error:
                        raise TransportError('N/A', r['error']['type'],
                                             r['error'])
                    r = None
                else:
                    r = Response(s, r)
                out.append(r)

            self._response = out

        return self._response
예제 #18
0
    def update_search(self, data):
        doc = self.to_search_data()

        if 'parent' not in data:
            try:
                current_structure = self.tag.get_active_structure()
                parent = current_structure.parent
                if parent is not None:
                    parent = {
                        'id': str(parent.tag.current_version.pk),
                        'index': parent.tag.current_version.elastic_index,
                    }
            except TagStructure.DoesNotExist:
                parent = None

            data['parent'] = parent

        doc.update(data)
        doc['current_version'] = self.tag.current_version == self

        doc.pop('_id', None)
        doc.pop('_index', None)
        doc = {'doc_as_upsert': True, 'doc': doc}
        es = get_connection()
        es.update(self.elastic_index, 'doc', str(self.pk), body=doc)
예제 #19
0
파일: index.py 프로젝트: cceh/kosh
 def create(cls, elex: Dict[str, Any]) -> None:
     '''
 todo: docs
 '''
     idxs = connections.get_connection().indices
     logger().debug('Creating index %s', elex.uid)
     idxs.create(index=elex.uid, body=cls.__schema(elex))
예제 #20
0
    def _rebuild(self, models, options):
        if options['atomic'] is False and not self._delete(models, options):
            return

        if options['atomic'] is True:
            alias_index_pairs = []
            index_suffix = "-" + datetime.now().strftime("%Y%m%d%H%M%S%f")
            for index in registry.get_indices(models):
                # The alias takes the original index name value. The
                # index name sent to Elasticsearch will be the alias
                # plus the suffix from above.
                new_index = index._name + index_suffix
                alias_index_pairs.append({
                    'alias': index._name,
                    'index': new_index
                })
                index._name = new_index

        self._create(models, options)
        self._populate(models, options)

        if options['atomic'] is True:
            es_conn = connections.get_connection()
            existing_aliases = []
            for index in es_conn.indices.get_alias().values():
                existing_aliases += index['aliases'].keys()

            for alias_index_pair in alias_index_pairs:
                alias = alias_index_pair['alias']
                alias_exists = alias in existing_aliases
                self._update_alias(es_conn, alias, alias_index_pair['index'],
                                   alias_exists, options)
예제 #21
0
파일: elastic.py 프로젝트: insanity13/faces
 def update_faces_index(self):
     q = {
         "script": {
             "inline": "ctx._source.person=params.person",
             "lang": "painless",
             "params": {
                 "person": self.person
             }
         },
         "query": {
             "bool": {
                 "must": [{
                     "terms": {
                         "_id": self.faces
                     }
                 }, {
                     "bool": {
                         "must_not": [{
                             "exists": {
                                 "field": "person"
                             }
                         }]
                     }
                 }]
             }
         }
     }
     es = connections.get_connection()
     es.update_by_query(body=q,
                        doc_type='doc',
                        index='faces',
                        conflicts='proceed')
예제 #22
0
파일: eisp.py 프로젝트: cceh/eisp
    def main(self) -> None:
        try:
            instance.config = ConfigParser()
            instance.config.read_dict(defaultconfig())
            logger().info('Started eisp with pid %s', getpid())

            for i in [i for i in argv if i.startswith('--')]:
                try:
                    mod('eisp.param.{}'.format(i[2:])).__dict__[i[2:]](argv)
                except:
                    exit('Invalid parameter or argument to {}'.format(i[2:]))

            conf = dotdict(instance.config['data'])
            connections.create_connection(hosts=[conf.host])
            delete_index(conf.index_name)
            create_index(conf.elastic_mapping, conf.index_name)

            for ok, info in helpers.parallel_bulk(connections.get_connection(),
                                                  actions=index_pdfs(
                                                      conf.index_name,
                                                      conf.root),
                                                  request_timeout=60,
                                                  chunk_size=100,
                                                  thread_count=8,
                                                  queue_size=8):
                if not ok:
                    print(info)

        except KeyboardInterrupt:
            print('\N{bomb}')
        except Exception as exception:
            logger().exception(exception)
        except SystemExit as exception:
            logger().critical(str(exception))
예제 #23
0
    def handle(self, *args, **kwargs):
        connection = connections.get_connection()

        self.stdout.write(f'Checking if index "{ES_INDEX}" exists...')
        if connection.indices.exists(index=ES_INDEX):
            self.stdout.write(f'Index "{ES_INDEX}" already exists')
            self.stdout.write(f'Updating mapping on "{ES_INDEX}" index...')
            connection.indices.put_mapping(index=ES_INDEX, body=ES_MAPPING)
            self.stdout.write(f'Updated mapping on "{ES_INDEX}" successfully')
        else:
            self.stdout.write(f'Index "{ES_INDEX}" does not exist')
            self.stdout.write(f'Creating index "{ES_INDEX}"...')
            connection.indices.create(index=ES_INDEX,
                                      body={
                                          'settings': {
                                              'number_of_shards': 1,
                                              'number_of_replicas': 0,
                                          },
                                          'mappings': ES_MAPPING,
                                      })
            self.stdout.write(f'Index "{ES_INDEX}" created successfully')

        self.stdout.write(f'Bulk updating documents on "{ES_INDEX}" index...')
        succeeded, _ = bulk(connection,
                            actions=self._document_generator(),
                            stats_only=True)
        self.stdout.write(
            f'Updated {succeeded} documents on "{ES_INDEX}" successfully')
예제 #24
0
def _with_elastic(do: str, action: Callable[[Elasticsearch], None]) -> bool:
    try:
        action(get_connection())
        return True
    except Exception as e:
        LOG.warning('Could not %s elastic. Perhaps client is down?', do)
        return debug_ex(e, f'{do} elastic', LOG, silent=True)
예제 #25
0
    def run(self, corpus, index_name="fact_corpus", document_class=Fact, **kwargs):
        connections.create_connection(hosts=["localhost"])
        document_class.init()

        documents = (
            document_class(meta={"id": id}, fact=doc["fact"]).to_dict(True)
            for id, doc in corpus.items()
        )

        logger.info(f"Building corpus index for {index_name}")

        # RayExecutor().run(documents, self.save_data, {})

        for success, info in tqdm(
            parallel_bulk(
                connections.get_connection(),
                documents,
                thread_count=kwargs.pop("batch_size", multiprocessing.cpu_count()),
                chunk_size=100000,
                max_chunk_bytes=2 * 1024 ** 3,
            )
        ):
            if not success:
                logger.error(f"A document failed: {info} ")

        logger.success("Elastic index successfully built")

        return index_name
예제 #26
0
    def import_content(self, task, path, rootdir=None, ip=None):
        if not rootdir:
            rootdir = os.path.dirname(path)

        self.indexed_files = []
        self.task = task

        archive = self.get_archive(path)
        if not archive:
            archive = getattr(ip, 'tag', None)
            if not archive:
                raise ValueError('No archive found')
        else:
            archive = archive.tag.current_version

        logger.debug("Deleting task tags already in database...")
        Tag.objects.filter(task=self.task).delete()

        logger.debug("Deleting task tags already in Elasticsearch...")
        indices_to_delete = [doc._index._name for doc in [Archive, Component, File]]

        es = get_connection()
        Search(using=es, index=indices_to_delete).query('term', task_id=str(self.task.pk)).delete()

        tags, tag_versions, tag_structures, components = self.parse_eard(path, ip, rootdir, archive)
        self.update_progress(50)

        self.save_to_database(tags, tag_versions, tag_structures, archive)
        self.update_progress(75)

        self.save_to_elasticsearch(components)
        self.update_progress(100)

        return self.indexed_files
예제 #27
0
def bulk_index_public():
    public_publications = Publication.api.primary().filter(status='REVIEWED')
    PublicationDoc.init()
    AuthorDoc.init()
    PlatformDoc.init()
    SponsorDoc.init()
    TagDoc.init()
    logger.info('creating publication index')
    client = connections.get_connection()
    bulk(client=client,
         actions=(AuthorDoc.from_instance(a) for a in Author.objects.filter(
             publications__in=public_publications)))
    bulk(client=client,
         actions=(PlatformDoc.from_instance(p)
                  for p in Platform.objects.filter(
                      publications__in=public_publications)))
    bulk(client=client,
         actions=(SponsorDoc.from_instance(s) for s in Sponsor.objects.filter(
             publications__in=public_publications)))
    bulk(client=client,
         actions=(TagDoc.from_instance(t) for t in Tag.objects.filter(
             publications__in=public_publications)))
    bulk(client=client,
         actions=(PublicationDoc.from_instance(p) for p in public_publications.select_related('container') \
         .prefetch_related('tags', 'sponsors', 'platforms', 'creators', 'model_documentation').iterator()))
예제 #28
0
def installPipelines():
    conn = get_connection()
    client = IngestClient(conn)
    client.put_pipeline(id='ingest_attachment',
                        body={
                            'description':
                            "Extract attachment information",
                            'processors': [{
                                "attachment": {
                                    "field": "data",
                                    "indexed_chars": "-1"
                                },
                                "remove": {
                                    "field": "data"
                                }
                            }]
                        })
    client.put_pipeline(id='add_timestamp',
                        body={
                            'description':
                            "Adds an index_date timestamp",
                            'processors': [
                                {
                                    "set": {
                                        "field": "index_date",
                                        "value": "{{_ingest.timestamp}}",
                                    },
                                },
                            ]
                        })
예제 #29
0
파일: tests.py 프로젝트: rajivraj/vmc
    def generate_vulns(self):
        docs = []

        for i in range(100):
            docs.append(
                create_vulnerability(create_asset(F'10.10.10.{i}', save=False),
                                     self.cve,
                                     save=False).to_dict())

        for i in range(100):
            vuln = create_vulnerability(create_asset(F'10.10.10.{i}',
                                                     save=False),
                                        self.cve,
                                        save=False)
            vuln.tags.append(VulnerabilityStatus.FIXED)
            docs.append(vuln.to_dict())

        for i in range(100):
            asset = create_asset(F'10.10.10.{i}', save=False)
            asset.tags = [AssetStatus.DELETED]
            vuln = create_vulnerability(asset, self.cve, save=False)
            docs.append(vuln.to_dict())

        bulk(get_connection(),
             docs,
             refresh=True,
             index=VulnerabilityDocument.Index.name)
예제 #30
0
파일: documents.py 프로젝트: rpkilby/yurika
    def bulk_create(cls, docs, using=None, index=None, handler=bulk, **kwargs):
        if index is not None:
            for doc in docs:
                doc._index = doc._index.clone(name=index)

        client = connections.get_connection(using or cls._index._using)
        docs = [doc.to_dict(include_meta=True) for doc in docs]
        return handler(client, docs, **kwargs)
예제 #31
0
 def put(self, request, *args, **kwargs):
     connection = get_connection()
     url_path = kwargs['url_path']
     body = json.loads(request.body)
     return JsonResponse(
         connection.transport.perform_request('PUT',
                                              f'/{url_path}',
                                              body=body))
예제 #32
0
    def bulk(self, documents):
        """Takes a documents generator, converts to ES format and passes generator on to ES bulk call"""

        conn = connections.get_connection()

        for result in elasticsearch.helpers.streaming_bulk(
            client=conn,
            actions=self._bulk_generator(documents),
            chunk_size=self._bulk_chunk_size,
        ):
            pass
예제 #33
0
def bulk_index_public():
    client = connections.get_connection()
    for doc_class in (PublicationDoc, AuthorDoc, PlatformDoc, SponsorDoc, TagDoc):
        client.indices.delete(index=doc_class.Index.name, ignore=[400, 404])
        doc_class.init()
    public_publications = Publication.api.primary().filter(status='REVIEWED')
    bulk(client=client,
         actions=(AuthorDoc.from_instance(a) for a in Author.objects.filter(publications__in=public_publications)))
    bulk(client=client,
         actions=(PlatformDoc.from_instance(p) for p in Platform.objects.filter(publications__in=public_publications)))
    bulk(client=client,
         actions=(SponsorDoc.from_instance(s) for s in Sponsor.objects.filter(publications__in=public_publications)))
    bulk(client=client,
         actions=(TagDoc.from_instance(t) for t in Tag.objects.filter(publications__in=public_publications)))
    bulk(client=client,
         actions=(PublicationDoc.from_instance(p) for p in public_publications.select_related('container') \
         .prefetch_related('code_archive_urls', 'tags', 'sponsors', 'platforms', 'creators', 'model_documentation').iterator()))
예제 #34
0
def migrate(move_data=True, update_alias=True):
    """
    Upgrade function that creates a new index for the data. Optionally it also can
    (and by default will) reindex previous copy of the data into the new index
    (specify ``move_data=False`` to skip this step) and update the alias to
    point to the latest index (set ``update_alias=False`` to skip).

    Note that while this function is running the application can still perform
    any and all searches without any loss of functionality. It should, however,
    not perform any writes at this time as those might be lost.
    """
    # construct a new index name by appending current timestamp
    next_index = PATTERN.replace('*', datetime.now().strftime('%Y%m%d%H%M%S%f'))

    # get the low level connection
    es = connections.get_connection()

    # create new index, it will use the settings from the template
    es.indices.create(index=next_index)

    if move_data:
        # move data from current alias to the new index
        es.reindex(
            body={"source": {"index": ALIAS}, "dest": {"index": next_index}},
            request_timeout=3600
        )
        # refresh the index to make the changes visible
        es.indices.refresh(index=next_index)

    if update_alias:
        # repoint the alias to point to the newly created index
        es.indices.update_aliases(body={
            'actions': [
                {"remove": {"alias": ALIAS, "index": PATTERN}},
                {"add": {"alias": ALIAS, "index": next_index}},
            ]
        })
예제 #35
0
 def __init__(self, *args, **kwargs):
     self.client = get_connection()
     super().__init__(*args, **kwargs)