Python Elasticsearch.info Examples

Programming Language: Python

Namespace/Package Name: elasticsearch.client

Class/Type: Elasticsearch

Method/Function: info

Examples at hotexamples.com: 5

Python Elasticsearch.info - 5 examples found. These are the top rated real world Python examples of elasticsearch.client.Elasticsearch.info extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Elasticsearch(30)

search(20)

index(8)

bulk(5)

create(5)

info(4)

update(4)

delete_by_query(3)

get(3)

mtermvectors(3)

scroll(2)

put_template(2)

ping(2)

delete(2)

count_percolate(1)

msearch(1)

termvector(1)

suggest(1)

search_template(1)

search_shards(1)

benchmark(1)

put_script(1)

percolate(1)

mlt(1)

mpercolate(1)

mget(1)

clear_scroll(1)

count(1)

abort_benchmark(1)

get_template(1)

get_source(1)

get_script(1)

explain(1)

exists(1)

delete_template(1)

delete_script(1)

list_benchmarks(1)

Example #1

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', default=os.environ.get('ES_HOST', None), help='Elasticsearch host')
    parser.add_argument('--port', default=os.environ.get('ES_PORT', None), type=int, help='Elasticsearch port')
    parser.add_argument('--username', default=os.environ.get('ES_USERNAME', None), help='Elasticsearch username')
    parser.add_argument('--password', default=os.environ.get('ES_PASSWORD', None), help='Elasticsearch password')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth')
    parser.add_argument('--ssl', action='store_true', default=env('ES_USE_SSL', None), help='Use TLS')
    parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use TLS')
    parser.add_argument('--verify-certs', action='store_true', default=None, help='Verify TLS certificates')
    parser.add_argument('--no-verify-certs', dest='verify_certs', action='store_false', help='Do not verify TLS certificates')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    parser.add_argument('--send_get_body_as', default='GET', help='Method for querying Elasticsearch - POST, GET or source')
    parser.add_argument(
        '--boto-profile',
        default=None,
        dest='profile',
        help='DEPRECATED: (use --profile) Boto profile to use for signing requests')
    parser.add_argument(
        '--profile',
        default=None,
        help='AWS profile to use for signing requests. Optionally use the AWS_DEFAULT_PROFILE environment variable')
    parser.add_argument(
        '--aws-region',
        default=None,
        help='AWS Region to use for signing requests. Optionally use the AWS_DEFAULT_REGION environment variable')
    parser.add_argument('--timeout', default=60, help='Elasticsearch request timeout')
    parser.add_argument('--config', default='config.yaml', help='Global config file (default: config.yaml)')
    parser.add_argument('--recreate', type=bool, default=False, help='Force re-creation of the index (this will cause data loss).')
    args = parser.parse_args()

    if os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    elif os.path.isfile(args.config):
        filename = args.config
    else:
        filename = ''

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = args.host if args.host else data.get('es_host')
        port = args.port if args.port else data.get('es_port')
        username = args.username if args.username else data.get('es_username')
        password = args.password if args.password else data.get('es_password')
        url_prefix = args.url_prefix if args.url_prefix is not None else data.get('es_url_prefix', '')
        use_ssl = args.ssl if args.ssl is not None else data.get('use_ssl')
        verify_certs = args.verify_certs if args.verify_certs is not None else data.get('verify_certs') is not False
        aws_region = data.get('aws_region', None)
        send_get_body_as = data.get('send_get_body_as', 'GET')
        ca_certs = data.get('ca_certs')
        client_cert = data.get('client_cert')
        client_key = data.get('client_key')
        index = args.index if args.index is not None else data.get('writeback_index')
        old_index = args.old_index if args.old_index is not None else None
    else:
        username = args.username if args.username else None
        password = args.password if args.password else None
        aws_region = args.aws_region
        host = args.host if args.host else raw_input('Enter Elasticsearch host: ')
        port = args.port if args.port else int(raw_input('Enter Elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None
                   else raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if use_ssl:
            verify_certs = (args.verify_certs if args.verify_certs is not None
                            else raw_input('Verify TLS certificates? t/f: ').lower() not in ('f', 'false'))
        else:
            verify_certs = True
        if args.no_auth is None and username is None:
            username = raw_input('Enter optional basic-auth username (or leave blank): ')
            password = getpass.getpass('Enter optional basic-auth password (or leave blank): ')
        url_prefix = (args.url_prefix if args.url_prefix is not None
                      else raw_input('Enter optional Elasticsearch URL prefix (prepends a string to the URL of every request): '))
        send_get_body_as = args.send_get_body_as
        ca_certs = None
        client_cert = None
        client_key = None
        index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ')
        if not index:
            index = 'elastalert_status'
        old_index = (args.old_index if args.old_index is not None
                     else raw_input('Name of existing index to copy? (Default None) '))

    timeout = args.timeout
    auth = Auth()
    http_auth = auth(host=host,
                     username=username,
                     password=password,
                     aws_region=aws_region,
                     profile_name=args.profile)
    es = Elasticsearch(
        host=host,
        port=port,
        timeout=timeout,
        use_ssl=use_ssl,
        verify_certs=verify_certs,
        connection_class=RequestsHttpConnection,
        http_auth=http_auth,
        url_prefix=url_prefix,
        send_get_body_as=send_get_body_as,
        client_cert=client_cert,
        ca_certs=ca_certs,
        client_key=client_key)

    esversion = es.info()["version"]["number"]
    print("Elastic Version:" + esversion.split(".")[0])
    elasticversion = int(esversion.split(".")[0])

    if(elasticversion > 5):
        mapping = {'type': 'keyword'}
    else:
        mapping = {'index': 'not_analyzed', 'type': 'string'}

    print("Mapping used for string:" + str(mapping))

    silence_mapping = {
        'silence': {
            'properties': {
                'rule_name': mapping,
                'until': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
                '@timestamp': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
            },
        },
    }
    ess_mapping = {
        'elastalert_status': {
            'properties': {
                'rule_name': mapping,
                '@timestamp': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
            },
        },
    }
    es_mapping = {
        'elastalert': {
            'properties': {
                'rule_name': mapping,
                '@timestamp': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
                'alert_time': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
                'match_time': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
                'match_body': {
                    'type': 'object',
                    'enabled': False,
                },
                'aggregate_id': mapping,
            },
        },
    }
    past_mapping = {
        'past_elastalert': {
            'properties': {
                'rule_name': mapping,
                'match_body': {
                    'type': 'object',
                    'enabled': False,
                },
                '@timestamp': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
                'aggregate_id': mapping,
            },
        },
    }
    error_mapping = {
        'elastalert_error': {
            'properties': {
                'data': {
                    'type': 'object',
                    'enabled': False,
                },
                '@timestamp': {
                    'type': 'date',
                    'format': 'dateOptionalTime',
                },
            },
        },
    }

    es_index = IndicesClient(es)
    if not args.recreate:
        if es_index.exists(index):
            print('Index ' + index + ' already exists. Skipping index creation.')
            return None

    # (Re-)Create indices.
    if (elasticversion > 5):
        index_names = (
            index,
            index + '_status',
            index + '_silence',
            index + '_error',
            index + '_past',
        )
    else:
        index_names = (
            index,
        )
    for index_name in index_names:
        if es_index.exists(index_name):
            print('Deleting index ' + index_name + '.')
            try:
                es_index.delete(index_name)
            except NotFoundError:
                # Why does this ever occur?? It shouldn't. But it does.
                pass
        es_index.create(index_name)

    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)

    if(elasticversion > 5):
        es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
        es.indices.put_mapping(index=index + '_status', doc_type='elastalert_status', body=ess_mapping)
        es.indices.put_mapping(index=index + '_silence', doc_type='silence', body=silence_mapping)
        es.indices.put_mapping(index=index + '_error', doc_type='elastalert_error', body=error_mapping)
        es.indices.put_mapping(index=index + '_past', doc_type='past_elastalert', body=past_mapping)
        print('New index %s created' % index)
    else:
        es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
        es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
        es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
        es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
        es.indices.put_mapping(index=index, doc_type='past_elastalert', body=past_mapping)
        print('New index %s created' % index)

    if old_index:
        print("Copying all data from old index '{0}' to new index '{1}'".format(old_index, index))
        # Use the defaults for chunk_size, scroll, scan_kwargs, and bulk_kwargs
        elasticsearch.helpers.reindex(es, old_index, index)

    print('Done!')

Example #2

Show file

class ELmonocleDB:

    log = logging.getLogger("monocle.ELmonocleDB")

    def __init__(
        self,
        elastic_conn="localhost:9200",
        index=None,
        timeout=10,
        prefix=CHANGE_PREFIX,
        create=True,
        previous_schema=False,
        idents_config: Optional[IdentsConfig] = None,
        user=None,
        password=None,
        use_ssl=None,
        verify_certs=None,
        ssl_show_warn=None,
    ) -> None:
        host, port = elastic_conn.split(":")
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        ip = socket.gethostbyname(host)
        self.log.info("ES IP is %s" % ip)
        self.log.info("ES prefix is %s" % prefix)

        elastic_conn = [
            {
                "host": host,
                "port": port,
            }
        ]

        if use_ssl:
            elastic_conn[0]["use_ssl"] = use_ssl

        if not verify_certs:
            elastic_conn[0]["verify_certs"] = verify_certs

        if not ssl_show_warn:
            elastic_conn[0]["ssl_show_warn"] = ssl_show_warn

        if user and password:
            elastic_conn[0]["http_auth"] = "%s:%s" % (user, password)

        while True:
            try:
                s.connect((ip, int(port)))
                s.shutdown(2)
                s.close()
                break
            except Exception as excpt:
                self.log.info(
                    "Unable to connect to %s: %s. Sleeping for %ds."
                    % (elastic_conn, excpt, timeout)
                )
                time.sleep(timeout)

        self.log.info("Connecting to ES server at %s" % elastic_conn)
        self.es = Elasticsearch(elastic_conn)
        self.log.info(self.es.info())

        if previous_schema:
            self.prefix = PREV_CHANGE_PREFIX
        else:
            self.prefix = prefix

        if not index:
            self.log.info("No index provided")
            return

        self.idents_config = idents_config or []

        self.index = "{}{}".format(self.prefix, index)
        self.log.info("Using ES index %s" % self.index)
        self.mapping = {
            "properties": {
                "id": {"type": "keyword"},
                "type": {"type": "keyword"},
                "number": {"type": "keyword"},
                "change_id": {"type": "keyword"},
                "title": {
                    "type": "text",
                    "fields": {"keyword": {"type": "keyword", "ignore_above": 8191}},
                },
                "text": {
                    "type": "text",
                    "fields": {"keyword": {"type": "keyword", "ignore_above": 8191}},
                },
                "url": {"type": "keyword"},
                "commit_count": {"type": "integer"},
                "additions": {"type": "integer"},
                "deletions": {"type": "integer"},
                "changed_files_count": {"type": "integer"},
                "changed_files": {
                    "properties": {
                        "additions": {"type": "integer"},
                        "deletions": {"type": "integer"},
                        "path": {"type": "keyword"},
                    }
                },
                "commits": {
                    "properties": {
                        "sha": {"type": "keyword"},
                        "author": {
                            "properties": {
                                "uid": {"type": "keyword"},
                                "muid": {"type": "keyword"},
                            }
                        },
                        "committer": {
                            "properties": {
                                "uid": {"type": "keyword"},
                                "muid": {"type": "keyword"},
                            }
                        },
                        "authored_at": {
                            "type": "date",
                            "format": "date_time_no_millis",
                        },
                        "committed_at": {
                            "type": "date",
                            "format": "date_time_no_millis",
                        },
                        "additions": {"type": "integer"},
                        "deletions": {"type": "integer"},
                        "title": {"type": "text"},
                    }
                },
                "repository_prefix": {"type": "keyword"},
                "repository_fullname": {"type": "keyword"},
                "repository_shortname": {"type": "keyword"},
                "author": {
                    "properties": {
                        "uid": {"type": "keyword"},
                        "muid": {"type": "keyword"},
                    }
                },
                "on_author": {
                    "properties": {
                        "uid": {"type": "keyword"},
                        "muid": {"type": "keyword"},
                    }
                },
                "committer": {
                    "properties": {
                        "uid": {"type": "keyword"},
                        "muid": {"type": "keyword"},
                    }
                },
                "merged_by": {
                    "properties": {
                        "uid": {"type": "keyword"},
                        "muid": {"type": "keyword"},
                    }
                },
                "branch": {"type": "keyword"},
                "target_branch": {"type": "keyword"},
                "created_at": {"type": "date", "format": "date_time_no_millis"},
                "on_created_at": {"type": "date", "format": "date_time_no_millis"},
                "merged_at": {"type": "date", "format": "date_time_no_millis"},
                "updated_at": {"type": "date", "format": "date_time_no_millis"},
                "closed_at": {"type": "date", "format": "date_time_no_millis"},
                "state": {"type": "keyword"},
                "duration": {"type": "integer"},
                "mergeable": {"type": "keyword"},
                "labels": {"type": "keyword"},
                "assignees": {
                    "type": "nested",
                    "properties": {
                        "uid": {"type": "keyword"},
                        "muid": {"type": "keyword"},
                    },
                },
                "approval": {"type": "keyword"},
                "draft": {"type": "boolean"},
                "self_merged": {"type": "boolean"},
                "crawler_metadata": {
                    "properties": {
                        "last_commit_at": {
                            "type": "date",
                            "format": "date_time_no_millis",
                        },
                        "last_post_at": {
                            "type": "date",
                            "format": "date_time_no_millis",
                        },
                        "total_docs_posted": {"type": "integer"},
                        "total_changes_updated": {"type": "integer"},
                        "total_orphans_updated": {"type": "integer"},
                    }
                },
                "tasks_data": {
                    "properties": {
                        "tid": {"type": "keyword"},
                        "ttype": {"type": "keyword"},
                        "crawler_name": {"type": "keyword"},
                        "updated_at": {"type": "date", "format": "date_time_no_millis"},
                        "change_url": {"type": "keyword"},
                        "severity": {"type": "keyword"},
                        "priority": {"type": "keyword"},
                        "score": {"type": "integer"},
                        "url": {"type": "keyword"},
                        "title": {
                            "type": "text",
                            "fields": {
                                "keyword": {"type": "keyword", "ignore_above": 8191}
                            },
                        },
                        "_adopted": {"type": "boolean"},
                    }
                },
            }
        }
        settings = {"mappings": self.mapping}
        self.ic = self.es.indices
        if create:
            self.ic.create(index=self.index, ignore=400, body=settings)
        # The authors_histo is failing on some context with this error when the
        # time slice is large: Must be less than or equal to: [10000] but was [10001]. ()This limit can be
        # set by changing the [search.max_buckets] cluster level)
        # This is an attempt to mitigate the issue
        cluster_settings = {"transient": {"search.max_buckets": 100000}}
        self.es.cluster.put_settings(body=cluster_settings)

    def update(self, source_it: List[Union[Change, Event]]) -> None:
        def gen(it):
            for _source in it:
                source = change_or_event_to_dict(_source)
                d = {}
                d["_index"] = self.index
                d["_op_type"] = "update"
                d["_id"] = source["id"]
                d["doc"] = source
                d["doc_as_upsert"] = True
                yield d

        bulk(self.es, gen(source_it))
        self.es.indices.refresh(index=self.index)

    def update_task_data(
        self,
        source_it: Union[
            List[TaskDataForEL],
            List[OrphanTaskDataForEL],
            List[AdoptedTaskDataForEL],
        ],
    ) -> Optional[BulkIndexError]:
        def gen(it):
            for _source in it:
                d = {}
                d["_index"] = self.index
                d["_op_type"] = "update"
                d["_id"] = _source._id
                d["doc"] = {}
                d["doc"].update({"id": _source._id})
                if isinstance(_source, TaskDataForEL):
                    d["doc"].update(
                        {"tasks_data": [asdict(td) for td in _source.tasks_data]}
                    )
                if isinstance(_source, OrphanTaskDataForEL):
                    d["doc"].update({"tasks_data": asdict(_source.task_data)})
                    d["doc"]["type"] = "OrphanTaskData"
                if isinstance(_source, AdoptedTaskDataForEL):
                    d["doc"].update({"tasks_data": asdict(_source.task_data)})
                d["doc_as_upsert"] = True
                yield d

        ret = None
        try:
            bulk(self.es, gen(source_it))
        except BulkIndexError as err:
            ret = err
        self.es.indices.refresh(index=self.index)
        return ret

    def compute_crawler_id_by_name(self, name, _type):
        return "crawler/%s/%s" % (_type, name)

    def get_task_crawler_metadata(self, name: str) -> Dict:
        try:
            ret = self.es.get(
                self.index, self.compute_crawler_id_by_name(name, "tasks_crawler")
            )
            return ret["_source"]["crawler_metadata"]
        except Exception:
            return {}

    def set_task_crawler_metadata(
        self, name: str, commit_date: datetime = None, push_infos: Dict = None
    ):
        metadata = {}
        if commit_date:
            metadata.update({"last_commit_at": commit_date})
        if push_infos:
            prev_metadata = self.get_task_crawler_metadata(name)
            metadata.update(
                {
                    "last_post_at": push_infos["last_post_at"],
                    "total_docs_posted": prev_metadata.get("total_docs_posted", 0)
                    + push_infos["total_docs_posted"],
                    "total_changes_updated": prev_metadata.get(
                        "total_changes_updated", 0
                    )
                    + push_infos["total_changes_updated"],
                    "total_orphans_updated": prev_metadata.get(
                        "total_orphans_updated", 0
                    )
                    + push_infos["total_orphans_updated"],
                }
            )
        body = {
            "doc": {"type": "TaskCrawlerDataCommit", "crawler_metadata": metadata},
            "doc_as_upsert": True,
        }
        ret = None
        try:
            self.es.update(
                self.index,
                self.compute_crawler_id_by_name(name, "tasks_crawler"),
                body=body,
            )
            self.es.indices.refresh(index=self.index)
        except Exception as err:
            ret = err
        return ret

    def delete_index(self):
        self.log.info("Deleting index: %s" % self.index)
        self.ic.delete(index=self.index)

    def delete_repository(self, repository_fullname):
        params = {"index": self.index}
        body = {
            "query": {
                "bool": {
                    "filter": {
                        "regexp": {
                            "repository_fullname": {"value": repository_fullname}
                        }
                    }
                }
            }
        }
        params["body"] = body
        self.es.delete_by_query(**params)
        self.es.indices.refresh(index=self.index)

    def get_last_updated(self, repository_fullname):
        params = {"index": self.index}
        body = {
            "sort": [{"updated_at": {"order": "desc"}}],
            "query": {
                "bool": {
                    "filter": [
                        {"term": {"type": "Change"}},
                        {
                            "regexp": {
                                "repository_fullname": {"value": repository_fullname}
                            }
                        },
                    ]
                }
            },
        }
        params["body"] = body
        try:
            res = self.es.search(**params)
        except Exception:
            return []
        ret = [r["_source"] for r in res["hits"]["hits"]]
        if not ret:
            return []
        return ret[0]

    def get_changes_by_url(self, change_urls, size):
        params = {
            "index": self.index,
            "body": {
                "size": size,
                "query": {
                    "bool": {
                        "filter": [
                            {"term": {"type": "Change"}},
                            {"terms": {"url": change_urls}},
                        ]
                    }
                },
            },
        }
        try:
            res = self.es.search(**params)
        except Exception:
            return []
        return [r["_source"] for r in res["hits"]["hits"]]

    def get_orphan_tds_by_change_urls(self, change_urls):
        assert len(change_urls) <= 50
        size = 5000  # Asumming not more that 100 TD data relataed to a change
        params = {
            "index": self.index,
            "body": {
                "size": size,
                "query": {
                    "bool": {
                        "must_not": {"exists": {"field": "tasks_data._adopted"}},
                        "filter": [
                            {"term": {"type": "OrphanTaskData"}},
                            {"terms": {"tasks_data.change_url": change_urls}},
                        ],
                    }
                },
            },
        }
        try:
            res = self.es.search(**params)
        except Exception:
            return []
        return [r["_source"] for r in res["hits"]["hits"]]

    def get_orphan_tds_and_declare_adpotion(self, changes_url):
        assert len(changes_url) <= 50
        tds = self.get_orphan_tds_by_change_urls(changes_url)
        if tds:
            adopted_tds = [
                AdoptedTaskDataForEL(
                    _id=td["id"],
                    task_data=AdoptedTaskData(_adopted=True),
                )
                for td in tds
            ]
            self.update_task_data(adopted_tds)
        return tds

    def update_changes_with_orphan_tds(self, mapping: Dict[str, str]):
        change_urls = list(mapping.keys())
        while change_urls:
            change_urls_to_process = change_urls[:50]
            change_urls = change_urls[50:]
            tds = self.get_orphan_tds_and_declare_adpotion(change_urls_to_process)
            # Group tds in buckets by change_url
            _map: Dict[str, List] = dict()
            for td in tds:
                _map.setdefault(td["tasks_data"]["change_url"], []).append(
                    td["tasks_data"]
                )
            # Create update docs to attach tds to matching changes
            to_update = []
            for change_url, tds in _map.items():
                to_update.append(
                    TaskDataForEL(
                        _id=mapping[change_url],
                        tasks_data=createELTaskData(tds),
                    )
                )
            self.update_task_data(to_update)

    def run_named_query(self, name, *args, **kwargs):
        if name not in queries.public_queries:
            raise UnknownQueryException("Unknown query: %s" % name)
        return getattr(queries, name)(self.es, self.index, *args, **kwargs)

    def get_indices(self):
        return [
            ind.replace(self.prefix, "")
            for ind in self.es.indices.get(self.prefix + "*")
        ]

    def iter_index(self):
        body = {"query": {"match_all": {}}}
        return scan(self.es, query=body, index=self.index, size=5000)

    def update_idents(self) -> None:

        import json

        bulk_size = 7500

        def get_obj_hash(obj: Dict) -> int:
            obj_json = json.dumps(obj, sort_keys=True)
            return hash(obj_json)

        def update_ident(dict_ident: Dict) -> Dict:
            dict_ident["muid"] = create_muid(dict_ident["uid"], self.idents_config)
            return dict_ident

        def _update_idents(obj: Dict) -> Tuple[Optional[Union[Change, Event]], bool]:

            prev_hash = get_obj_hash(obj)

            if obj["type"] == "Change":
                obj["author"] = update_ident(obj["author"])
                if "committer" in obj:
                    obj["committer"] = update_ident(obj["committer"])
                if "merged_by" in obj:
                    obj["merged_by"] = update_ident(obj["merged_by"])
                if "assignees" in obj:
                    obj["assignees"] = list(map(update_ident, obj["assignees"]))
                if "commits" in obj:
                    for commit in obj["commits"]:
                        commit["author"] = update_ident(commit["author"])
                        commit["committer"] = update_ident(commit["committer"])
            if obj["type"] in get_events_list():
                if "author" in obj:
                    obj["author"] = update_ident(obj["author"])
                if "on_author" in obj:
                    obj["on_author"] = update_ident(obj["on_author"])
            updated = not prev_hash == get_obj_hash(obj)
            if updated:
                return dict_to_change_or_event(obj), True
            else:
                return None, False

        def bulk_update(to_update: List) -> List:
            print("Updating %s objects ..." % len(to_update))
            self.update(to_update)
            return []

        to_update = []
        total_read = 0
        for _obj in self.iter_index():
            total_read += 1
            if total_read % bulk_size == 0:
                print("%s objects read from the database" % total_read)
            obj = _obj["_source"]
            obj, updated = _update_idents(obj)
            if updated:
                to_update.append(obj)
            if len(to_update) == bulk_size:
                to_update = bulk_update(to_update)

        bulk_update(to_update)

Example #3

Show file

class BaseElasticsearchBackend(Base):
    """Base connection wrapper based on the ElasticSearch official library.

    It uses two entry points to configure the underlying connection:

    * ``transport_class``: the transport class from ``elasticsearch``. By
      default ``elasticsearch.transport.Transport``.
    * ``connection_class``: the connection class used by the transport class.
      It's undefined by default, as it is on the subclasses to provide one.

    If any of these elements is not defined, an ``ImproperlyConfigured`` error
    will be raised when the backend will try to configure the client.
    """
    #: ElasticSearch transport class used by the client class to perform
    #: requests.
    transport_class = Transport
    #: ElasticSearch connection class used by the transport class to perform
    #: requests.
    connection_class = None

    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)

    # Server methods
    # ==============
    # The underlying client does not require index names to perform server
    # related queries, such as "ping" or "info". The connection wrapper act
    # for them as a proxy.

    def ping(self, **kwargs):
        return self.client.ping(**kwargs)

    def info(self, **kwargs):
        return self.client.info(**kwargs)

    def put_script(self, lang, script_id, body, **kwargs):
        return self.client.put_script(lang, script_id, body, **kwargs)

    def get_script(self, lang, script_id, **kwargs):
        return self.client.get_script(lang, script_id, **kwargs)

    def delete_script(self, lang, script_id, **kwargs):
        return self.client.delete_script(lang, script_id, **kwargs)

    def put_template(self, template_id, body, **kwargs):
        return self.client.put_template(template_id, body, **kwargs)

    def get_template(self, template_id, body=None, **kwargs):
        return self.client.get_template(template_id, body, **kwargs)

    def delete_template(self, template_id=None, **kwargs):
        return self.client.delete_template(template_id, **kwargs)

    # Bulk methods
    # ============
    # The underlying client does not require index names, but it can be used.
    # As it makes sense to not give an index, developers are free to use these
    # as they want, as long as they are careful.

    def mget(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mget(body, index, doc_type, **kwargs)

    def bulk(self, body, index=None, doc_type=None, **kwargs):
        return self.client.bulk(body, index, doc_type, **kwargs)

    def msearch(self, body, index=None, doc_type=None, **kwargs):
        return self.client.msearch(body, index, doc_type, **kwargs)

    def mpercolate(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mpercolate(body, index, doc_type, **kwargs)

    # Scroll methods
    # ==============
    # The underlying client does not require an index to perform scroll.

    def scroll(self, scroll_id, **kwargs):
        return self.client.scroll(scroll_id, **kwargs)

    def clear_scroll(self, scroll_id, body=None, **kwargs):
        return self.client.clear_scroll(scroll_id, body, **kwargs)

    # Query methods
    # =============
    # The underlying client requires index names (or alias names) to perform
    # queries. The connection wrapper overrides these client methods to
    # automatically uses the configured names (indices and/or aliases).

    def create(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.create(self.indices, doc_type, body, doc_id,
                                  **kwargs)

    def index(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.index(self.indices, doc_type, body, doc_id,
                                 **kwargs)

    def exists(self, doc_id, doc_type='_all', **kwargs):
        return self.client.exists(self.indices, doc_id, doc_type, **kwargs)

    def get(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get(self.indices, doc_id, doc_type, **kwargs)

    def get_source(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get_source(self.indices, doc_id, doc_type, **kwargs)

    def update(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.update(self.indices, doc_type, doc_id, body,
                                  **kwargs)

    def search(self, doc_type=None, body=None, **kwargs):
        return self.client.search(self.indices, doc_type, body, **kwargs)

    def search_shards(self, doc_type=None, **kwargs):
        return self.client.search_shards(self.indices, doc_type, **kwargs)

    def search_template(self, doc_type=None, body=None, **kwargs):
        return self.client.search_template(self.indices, doc_type, body,
                                           **kwargs)

    def explain(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.explain(self.indices, doc_type, doc_id, body,
                                   **kwargs)

    def delete(self, doc_type, doc_id, **kwargs):
        return self.client.delete(self.indices, doc_type, doc_id, **kwargs)

    def count(self, doc_type=None, body=None, **kwargs):
        return self.client.count(self.indices, doc_type, body, **kwargs)

    def delete_by_query(self, doc_type=None, body=None, **kwargs):
        return self.client.delete_by_query(self.indices, doc_type, body,
                                           **kwargs)

    def suggest(self, body, **kwargs):
        return self.client.suggest(body, self.indices, **kwargs)

    def percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.percolate(self.indices, doc_type, doc_id, body,
                                     **kwargs)

    def count_percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.count_percolate(self.indices, doc_type, doc_id,
                                           body, **kwargs)

    def mlt(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.mlt(self.indices, doc_type, doc_id, body, **kwargs)

    def termvector(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.termvector(self.indices, doc_type, doc_id, body,
                                      **kwargs)

    def mtermvectors(self, doc_type=None, body=None, **kwargs):
        return self.client.mtermvectors(self.indices, doc_type, body, **kwargs)

    def benchmark(self, doc_type=None, body=None, **kwargs):
        return self.client.benchmark(self.indices, doc_type, body, **kwargs)

    def abort_benchmark(self, name=None, **kwargs):
        return self.client.abort_benchmark(name, **kwargs)

    def list_benchmarks(self, doc_type=None, **kwargs):
        return self.client.list_benchmarks(self.indices, doc_type, **kwargs)

Example #4

Show file

File: elasticsearch.py Project: Exirel/djangoes

class BaseElasticsearchBackend(Base):
    """Base connection wrapper based on the ElasticSearch official library.

    It uses two entry points to configure the underlying connection:

    * ``transport_class``: the transport class from ``elasticsearch``. By
      default ``elasticsearch.transport.Transport``.
    * ``connection_class``: the connection class used by the transport class.
      It's undefined by default, as it is on the subclasses to provide one.

    If any of these elements is not defined, an ``ImproperlyConfigured`` error
    will be raised when the backend will try to configure the client.
    """
    #: ElasticSearch transport class used by the client class to perform
    #: requests.
    transport_class = Transport
    #: ElasticSearch connection class used by the transport class to perform
    #: requests.
    connection_class = None

    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)

    # Server methods
    # ==============
    # The underlying client does not require index names to perform server
    # related queries, such as "ping" or "info". The connection wrapper act
    # for them as a proxy.

    def ping(self, **kwargs):
        return self.client.ping(**kwargs)

    def info(self, **kwargs):
        return self.client.info(**kwargs)

    def put_script(self, lang, script_id, body, **kwargs):
        return self.client.put_script(lang, script_id, body, **kwargs)

    def get_script(self, lang, script_id, **kwargs):
        return self.client.get_script(lang, script_id, **kwargs)

    def delete_script(self, lang, script_id, **kwargs):
        return self.client.delete_script(lang, script_id, **kwargs)

    def put_template(self, template_id, body, **kwargs):
        return self.client.put_template(template_id, body, **kwargs)

    def get_template(self, template_id, body=None, **kwargs):
        return self.client.get_template(template_id, body, **kwargs)

    def delete_template(self, template_id=None, **kwargs):
        return self.client.delete_template(template_id, **kwargs)

    # Bulk methods
    # ============
    # The underlying client does not require index names, but it can be used.
    # As it makes sense to not give an index, developers are free to use these
    # as they want, as long as they are careful.

    def mget(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mget(body, index, doc_type, **kwargs)

    def bulk(self, body, index=None, doc_type=None, **kwargs):
        return self.client.bulk(body, index, doc_type, **kwargs)

    def msearch(self, body, index=None, doc_type=None, **kwargs):
        return self.client.msearch(body, index, doc_type, **kwargs)

    def mpercolate(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mpercolate(body, index, doc_type, **kwargs)

    # Scroll methods
    # ==============
    # The underlying client does not require an index to perform scroll.

    def scroll(self, scroll_id, **kwargs):
        return self.client.scroll(scroll_id, **kwargs)

    def clear_scroll(self, scroll_id, body=None, **kwargs):
        return self.client.clear_scroll(scroll_id, body, **kwargs)

    # Query methods
    # =============
    # The underlying client requires index names (or alias names) to perform
    # queries. The connection wrapper overrides these client methods to
    # automatically uses the configured names (indices and/or aliases).

    def create(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.create(
            self.indices, doc_type, body, doc_id, **kwargs)

    def index(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.index(
            self.indices, doc_type, body, doc_id, **kwargs)

    def exists(self, doc_id, doc_type='_all', **kwargs):
        return self.client.exists(self.indices, doc_id, doc_type, **kwargs)

    def get(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get(self.indices, doc_id, doc_type, **kwargs)

    def get_source(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get_source(self.indices, doc_id, doc_type, **kwargs)

    def update(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.update(
            self.indices, doc_type, doc_id, body, **kwargs)

    def search(self, doc_type=None, body=None, **kwargs):
        return self.client.search(self.indices, doc_type, body, **kwargs)

    def search_shards(self, doc_type=None, **kwargs):
        return self.client.search_shards(self.indices, doc_type, **kwargs)

    def search_template(self, doc_type=None, body=None, **kwargs):
        return self.client.search_template(
            self.indices, doc_type, body, **kwargs)

    def explain(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.explain(
            self.indices, doc_type, doc_id, body, **kwargs)

    def delete(self, doc_type, doc_id, **kwargs):
        return self.client.delete(self.indices, doc_type, doc_id, **kwargs)

    def count(self, doc_type=None, body=None, **kwargs):
        return self.client.count(self.indices, doc_type, body, **kwargs)

    def delete_by_query(self, doc_type=None, body=None, **kwargs):
        return self.client.delete_by_query(
            self.indices, doc_type, body, **kwargs)

    def suggest(self, body, **kwargs):
        return self.client.suggest(body, self.indices, **kwargs)

    def percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.percolate(
            self.indices, doc_type, doc_id, body, **kwargs)

    def count_percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.count_percolate(
            self.indices, doc_type, doc_id, body, **kwargs)

    def mlt(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.mlt(self.indices, doc_type, doc_id, body, **kwargs)

    def termvector(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.termvector(
            self.indices, doc_type, doc_id, body, **kwargs)

    def mtermvectors(self, doc_type=None, body=None, **kwargs):
        return self.client.mtermvectors(self.indices, doc_type, body, **kwargs)

    def benchmark(self, doc_type=None, body=None, **kwargs):
        return self.client.benchmark(self.indices, doc_type, body, **kwargs)

    def abort_benchmark(self, name=None, **kwargs):
        return self.client.abort_benchmark(name, **kwargs)

    def list_benchmarks(self, doc_type=None, **kwargs):
        return self.client.list_benchmarks(self.indices, doc_type, **kwargs)

Example #5

Show file

File: db.py Project: guy9050/monocle

class ELmonocleDB:

    log = logging.getLogger("monocle.ELmonocleDB")

    def __init__(
        self,
        elastic_conn="localhost:9200",
        index=None,
        timeout=10,
        prefix=CHANGE_PREFIX,
        create=True,
        previous_schema=False,
        idents_config: Optional[IdentsConfig] = None,
        user=None,
        password=None,
        use_ssl=None,
        verify_certs=None,
        ssl_show_warn=None,
    ) -> None:
        host, port = elastic_conn.split(":")
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        ip = socket.gethostbyname(host)
        self.log.info("ES IP is %s" % ip)
        self.log.info("ES prefix is %s" % prefix)

        elastic_conn = [{
            "host": host,
            "port": port,
        }]

        if use_ssl:
            elastic_conn[0]["use_ssl"] = use_ssl

        if not verify_certs:
            elastic_conn[0]["verify_certs"] = verify_certs

        if not ssl_show_warn:
            elastic_conn[0]["ssl_show_warn"] = ssl_show_warn

        if user and password:
            elastic_conn[0]["http_auth"] = "%s:%s" % (user, password)

        while True:
            try:
                s.connect((ip, int(port)))
                s.shutdown(2)
                s.close()
                break
            except Exception as excpt:
                self.log.info(
                    "Unable to connect to %s: %s. Sleeping for %ds." %
                    (elastic_conn, excpt, timeout))
                time.sleep(timeout)

        self.log.info("Connecting to ES server at %s" % elastic_conn)
        self.es = Elasticsearch(elastic_conn)
        self.log.info(self.es.info())

        if previous_schema:
            self.prefix = PREV_CHANGE_PREFIX
        else:
            self.prefix = prefix

        if not index:
            self.log.info("No index provided")
            return

        self.idents_config = idents_config or []

        self.index = "{}{}".format(self.prefix, index)
        self.log.info("Using ES index %s" % self.index)
        self.mapping = {
            "properties": {
                "id": {
                    "type": "keyword"
                },
                "type": {
                    "type": "keyword"
                },
                "number": {
                    "type": "keyword"
                },
                "change_id": {
                    "type": "keyword"
                },
                "title": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 8191
                        }
                    },
                },
                "text": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 8191
                        }
                    },
                },
                "url": {
                    "type": "keyword"
                },
                "commit_count": {
                    "type": "integer"
                },
                "additions": {
                    "type": "integer"
                },
                "deletions": {
                    "type": "integer"
                },
                "changed_files_count": {
                    "type": "integer"
                },
                "changed_files": {
                    "properties": {
                        "additions": {
                            "type": "integer"
                        },
                        "deletions": {
                            "type": "integer"
                        },
                        "path": {
                            "type": "keyword"
                        },
                    }
                },
                "commits": {
                    "properties": {
                        "sha": {
                            "type": "keyword"
                        },
                        "author": {
                            "properties": {
                                "uid": {
                                    "type": "keyword"
                                },
                                "muid": {
                                    "type": "keyword"
                                },
                            }
                        },
                        "committer": {
                            "properties": {
                                "uid": {
                                    "type": "keyword"
                                },
                                "muid": {
                                    "type": "keyword"
                                },
                            }
                        },
                        "authored_at": {
                            "type": "date",
                            "format": "date_time_no_millis",
                        },
                        "committed_at": {
                            "type": "date",
                            "format": "date_time_no_millis",
                        },
                        "additions": {
                            "type": "integer"
                        },
                        "deletions": {
                            "type": "integer"
                        },
                        "title": {
                            "type": "text"
                        },
                    }
                },
                "repository_prefix": {
                    "type": "keyword"
                },
                "repository_fullname": {
                    "type": "keyword"
                },
                "repository_shortname": {
                    "type": "keyword"
                },
                "author": {
                    "properties": {
                        "uid": {
                            "type": "keyword"
                        },
                        "muid": {
                            "type": "keyword"
                        },
                    }
                },
                "on_author": {
                    "properties": {
                        "uid": {
                            "type": "keyword"
                        },
                        "muid": {
                            "type": "keyword"
                        },
                    }
                },
                "committer": {
                    "properties": {
                        "uid": {
                            "type": "keyword"
                        },
                        "muid": {
                            "type": "keyword"
                        },
                    }
                },
                "merged_by": {
                    "properties": {
                        "uid": {
                            "type": "keyword"
                        },
                        "muid": {
                            "type": "keyword"
                        },
                    }
                },
                "branch": {
                    "type": "keyword"
                },
                "target_branch": {
                    "type": "keyword"
                },
                "created_at": {
                    "type": "date",
                    "format": "date_time_no_millis"
                },
                "on_created_at": {
                    "type": "date",
                    "format": "date_time_no_millis"
                },
                "merged_at": {
                    "type": "date",
                    "format": "date_time_no_millis"
                },
                "updated_at": {
                    "type": "date",
                    "format": "date_time_no_millis"
                },
                "closed_at": {
                    "type": "date",
                    "format": "date_time_no_millis"
                },
                "state": {
                    "type": "keyword"
                },
                "duration": {
                    "type": "integer"
                },
                "mergeable": {
                    "type": "keyword"
                },
                "labels": {
                    "type": "keyword"
                },
                "assignees": {
                    "type": "nested",
                    "properties": {
                        "uid": {
                            "type": "keyword"
                        },
                        "muid": {
                            "type": "keyword"
                        },
                    },
                },
                "approval": {
                    "type": "keyword"
                },
                "draft": {
                    "type": "boolean"
                },
                "self_merged": {
                    "type": "boolean"
                },
            }
        }
        settings = {"mappings": self.mapping}
        self.ic = self.es.indices
        if create:
            self.ic.create(index=self.index, ignore=400, body=settings)
        # The authors_histo is failing on some context with this error when the
        # time slice is large: Must be less than or equal to: [10000] but was [10001]. ()This limit can be
        # set by changing the [search.max_buckets] cluster level)
        # This is an attempt to mitigate the issue
        cluster_settings = {"transient": {"search.max_buckets": 100000}}
        self.es.cluster.put_settings(body=cluster_settings)

    def update(self, source_it: List[Union[Change, Event]]) -> None:
        def gen(it):
            for _source in it:
                source = change_or_event_to_dict(_source)
                d = {}
                d["_index"] = self.index
                d["_op_type"] = "update"
                d["_id"] = source["id"]
                d["doc"] = source
                d["doc_as_upsert"] = True
                yield d

        bulk(self.es, gen(source_it))
        self.es.indices.refresh(index=self.index)

    def delete_index(self):
        self.log.info("Deleting index: %s" % self.index)
        self.ic.delete(index=self.index)

    def delete_repository(self, repository_fullname):
        params = {"index": self.index}
        body = {
            "query": {
                "bool": {
                    "filter": {
                        "regexp": {
                            "repository_fullname": {
                                "value": repository_fullname
                            }
                        }
                    }
                }
            }
        }
        params["body"] = body
        self.es.delete_by_query(**params)
        self.es.indices.refresh(index=self.index)

    def get_last_updated(self, repository_fullname):
        params = {"index": self.index}
        body = {
            "sort": [{
                "updated_at": {
                    "order": "desc"
                }
            }],
            "query": {
                "bool": {
                    "filter": [
                        {
                            "term": {
                                "type": "Change"
                            }
                        },
                        {
                            "regexp": {
                                "repository_fullname": {
                                    "value": repository_fullname
                                }
                            }
                        },
                    ]
                }
            },
        }
        params["body"] = body
        try:
            res = self.es.search(**params)
        except Exception:
            return []
        ret = [r["_source"] for r in res["hits"]["hits"]]
        if not ret:
            return []
        return ret[0]

    def run_named_query(self, name, *args, **kwargs):
        if name not in queries.public_queries:
            raise UnknownQueryException("Unknown query: %s" % name)
        return getattr(queries, name)(self.es, self.index, *args, **kwargs)

    def get_indices(self):
        return [
            ind.replace(self.prefix, "")
            for ind in self.es.indices.get(self.prefix + "*")
        ]

    def iter_index(self):
        body = {"query": {"match_all": {}}}
        return scan(self.es, query=body, index=self.index, size=5000)

    def update_idents(self) -> None:

        import json

        bulk_size = 7500

        def get_obj_hash(obj: Dict) -> int:
            obj_json = json.dumps(obj, sort_keys=True)
            return hash(obj_json)

        def update_ident(dict_ident: Dict) -> Dict:
            dict_ident["muid"] = create_muid(dict_ident["uid"],
                                             self.idents_config)
            return dict_ident

        def _update_idents(
                obj: Dict) -> Tuple[Optional[Union[Change, Event]], bool]:

            prev_hash = get_obj_hash(obj)

            if obj["type"] == "Change":
                obj["author"] = update_ident(obj["author"])
                if "committer" in obj:
                    obj["committer"] = update_ident(obj["committer"])
                if "merged_by" in obj:
                    obj["merged_by"] = update_ident(obj["merged_by"])
                if "assignees" in obj:
                    obj["assignees"] = list(map(update_ident,
                                                obj["assignees"]))
                if "commits" in obj:
                    for commit in obj["commits"]:
                        commit["author"] = update_ident(commit["author"])
                        commit["committer"] = update_ident(commit["committer"])
            else:
                if "author" in obj:
                    obj["author"] = update_ident(obj["author"])
                if "on_author" in obj:
                    obj["on_author"] = update_ident(obj["on_author"])
            updated = not prev_hash == get_obj_hash(obj)
            if updated:
                return dict_to_change_or_event(obj), True
            else:
                return None, False

        def bulk_update(to_update: List) -> List:
            print("Updating %s objects ..." % len(to_update))
            self.update(to_update)
            return []

        to_update = []
        total_read = 0
        for _obj in self.iter_index():
            total_read += 1
            if total_read % bulk_size == 0:
                print("%s objects read from the database" % total_read)
            obj = _obj["_source"]
            obj, updated = _update_idents(obj)
            if updated:
                to_update.append(obj)
            if len(to_update) == bulk_size:
                to_update = bulk_update(to_update)

        bulk_update(to_update)