async def update(self, filter_obj: IFilter, to_update: dict) -> int: """ Update object in Storage by Query. :param IFilter filter_obj: filter object which describes what objects need to update :param dict to_update: dictionary with fields and values which should be updated :return: number of updated entries """ def dict_to_source(__to_update: dict) -> str: """ Convert __to_update dict into elasticsearch source representation. :param __to_update: dictionary with fields and values which should be updated :return: elasticsearch inline representation """ def _value_converter(_value: Any) -> Any: """ Convert value if it is necessary :param _value: :return: """ if isinstance(_value, bool): return str(_value).lower() elif isinstance(_value, datetime): return _value.strftime(self._default_date_format) return _value return " ".join((self._inline_templates.get( type(value), self._default_template).substitute( FIELD_NAME=key, FIELD_VALUE=_value_converter(value)) for key, value in __to_update.items())) def _update(_ubq) -> UpdateByQueryResponse: """ Perform Update by Query in separate thread. :param UpdateByQuery _ubq: UpdateByQuery instance :return: Response object of ElasticSearch DSL module """ return _ubq.execute() _to_update = to_update.copy() # Copy because it will be change below # NOTE: Important: call of the parent update method changes _to_update dict! await super().update(filter_obj, _to_update) # Call the generic code ubq = UpdateByQuery(index=self._index, using=self._es_client) filter_by = self._query_converter.build(filter_obj) ubq = ubq.query(filter_by) source = dict_to_source(_to_update) ubq = ubq.script(source=source, lang=ESWords.PAINLESS) result = await self._loop.run_in_executor(self._tread_pool_exec, _update, ubq) await self._refresh_index( ) # ensure that updated results will be ready immediately return result.updated
def test_complex_example(): ubq = UpdateByQuery() ubq = ubq.query('match', title='python') \ .query(~Q('match', title='ruby')) \ .filter(Q('term', category='meetup') | Q('term', category='conference')) \ .script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3}) ubq.query.minimum_should_match = 2 assert { 'query': { 'bool': { 'filter': [ { 'bool': { 'should': [ {'term': {'category': 'meetup'}}, {'term': {'category': 'conference'}} ] } } ], 'must': [ {'match': {'title': 'python'}}], 'must_not': [{'match': {'title': 'ruby'}}], 'minimum_should_match': 2 } }, 'script': { 'source': 'ctx._source.likes += params.f', 'lang': 'painless', 'params': { 'f': 3 } } } == ubq.to_dict()
def update_by_query(self, **kwargs): """ This function is used to update ElasticSearch entries by record :param kwargs: provided kwargs :return: Example: >>> update_by_query(fields="publisher", query="oreilly", field_to_update="publisher", new_value="OnMedia") """ try: client = Elasticsearch( hosts=ELASTIC_HOSTNAME ) ubq = UpdateByQuery( using=client, index=self.book_index ) search_fields = kwargs["fields"] query = kwargs["query"] field_to_update = kwargs["field_to_update"] new_value = kwargs["new_value"] update_query = ubq.query( "multi_match", query=query, fields=search_fields ).script( source="ctx._source.{}='{}'".format(field_to_update, new_value) ) results = update_query.execute()._d_ return results except Exception as ex: print(ex, flush=True)
def on_delete(self, status_id, user_id): ubq = UpdateByQuery(using=es, index="tweet*") query = (ubq.query("match", id_str__keyword=str(status_id)).query( "match", user__id_str__keyword=str(user_id)).script( source="ctx._source.delete = true")) resp = query.execute() print(f"deleted {status_id} {user_id} {resp.to_dict()}") return
def _update_by_query(index, field_name, old_version, new_version): ubq = UpdateByQuery(using=get_connection(), index=index).filter( 'term', **{ F'{field_name}__id': old_version.id }).script(source=F'ctx._source.{field_name} = params.new_value', params={'new_value': new_version.to_dict()}) refresh = getattr(settings, 'TEST', False) ubq.params(refresh=refresh, conflicts='proceed').execute()
def test_params_being_passed_to_search(mock_client): ubq = UpdateByQuery(using="mock") ubq = ubq.params(routing="42") ubq.execute() mock_client.update_by_query.assert_called_once_with(index=None, body={}, routing="42")
def test_params_being_passed_to_search(mock_client): ubq = UpdateByQuery(using='mock') ubq = ubq.params(routing='42') ubq.execute() mock_client.update_by_query.assert_called_once_with( index=None, body={}, routing='42' )
def test_exclude(): ubq = UpdateByQuery() ubq = ubq.exclude("match", title="python") assert { "query": { "bool": { "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] } } } == ubq.to_dict()
def update_by_query(self, query, script_source): try: self.fix_read_only_allow_delete() ubq = UpdateByQuery(using=self.es, index=self.index).update_from_dict(query).script(source=script_source) ubq.execute() except Exception as err: print("Error: ", err) return False return True
def update_all(self, field, value, **kwargs): # Process hits here # def process_hits(hits): # for item in hits: # self.update_field(item['_id'], field, value) logger = kwargs.get("logger", None) # Check index exists if not self.es.indices.exists(index=self.index): # print("Index " + self.index + " not exists") exit() ubq = UpdateByQuery(using=self.es, index=self.index).update_from_dict({"query": {"match_all": {}}}).script( source="ctx._source." + field + " = '" + value + "'") response = ubq.execute() # # Init scroll by search # data = self.es.search( # index=self.index, # doc_type=self.doc_type, # scroll='15m', # size=self.size, # body=self.body # ) # # # Get the scroll ID # sid = data['_scroll_id'] # scroll_size = len(data['hits']['hits']) # # # Before scroll, process current batch of hits # # print(data['hits']['total']) # process_hits(data['hits']['hits']) # processed_docs = 0 # # while scroll_size > 0: # # data = self.es.scroll(scroll_id=sid, scroll='15m') # # # Process current batch of hits # process_hits(data['hits']['hits']) # # # Update the scroll ID # sid = data['_scroll_id'] # # # Get the number of results that returned in the last scroll # scroll_size = len(data['hits']['hits']) # # if (logger): # processed_docs += scroll_size # logger.add_log("Scrolling " + str(round(processed_docs * 100 / data['hits']['total'],2)) + "% documents") return True
def test_complex_example(): ubq = UpdateByQuery() ubq = (ubq.query("match", title="python").query(~Q("match", title="ruby")).filter( Q("term", category="meetup") | Q("term", category="conference")).script( source="ctx._source.likes += params.f", lang="painless", params={"f": 3})) ubq.query.minimum_should_match = 2 assert { "query": { "bool": { "filter": [{ "bool": { "should": [ { "term": { "category": "meetup" } }, { "term": { "category": "conference" } }, ] } }], "must": [{ "match": { "title": "python" } }], "must_not": [{ "match": { "title": "ruby" } }], "minimum_should_match": 2, } }, "script": { "source": "ctx._source.likes += params.f", "lang": "painless", "params": { "f": 3 }, }, } == ubq.to_dict()
def test_ubq_to_dict(): ubq = UpdateByQuery() assert {} == ubq.to_dict() ubq = ubq.query('match', f=42) assert {"query": {"match": {'f': 42}}} == ubq.to_dict() assert {"query": {"match": {'f': 42}}, "size": 10} == ubq.to_dict(size=10) ubq = UpdateByQuery(extra={"size": 5}) assert {"size": 5} == ubq.to_dict()
def run_update_by_query(esc, query, index): ubq = UpdateByQuery(using=esc, index=index).update_from_dict( query).params(request_timeout=100) finished = False count = 0 while not finished and count < 3: try: count += 1 response = ubq.execute() finished = True except Exception as e: print(e) sleep(10 * count) pass
def test_reverse(): d = { "query": { "filtered": { "filter": { "bool": { "should": [ {"term": {"category": "meetup"}}, {"term": {"category": "conference"}}, ] } }, "query": { "bool": { "must": [{"match": {"title": "python"}}], "must_not": [{"match": {"title": "ruby"}}], "minimum_should_match": 2, } }, } }, "script": { "source": "ctx._source.likes += params.f", "lang": "painless", "params": {"f": 3}, }, } d2 = deepcopy(d) ubq = UpdateByQuery.from_dict(d) assert d == d2 assert d == ubq.to_dict()
def set_elasticsearch_covid_outlays_to_zero(self, es_client, award_ids: list): """ Sets 'total_covid_outlay' to zero in Elasticsearch (when not zero) for a provided list of award_ids. :param es_client: Client used to connect to Elasticsearch :param award_ids: List of award_ids to set outlays to zero in Elasticsearch """ # Creates an Elasticsearch Query criteria for the UpdateByQuery call query = (ES_Q("range", **{"total_covid_outlay": { "gt": 0 }}) | ES_Q("range", **{"total_covid_outlay": { "lt": 0 }})) & ES_Q("terms", **{"award_id": award_ids}) # Sets total_covid_outlay to zero based on the above Query criteria ubq = (UpdateByQuery( using=es_client, index=settings.ES_AWARDS_WRITE_ALIAS).script( source="ctx._source['total_covid_outlay'] = 0", lang="painless").query(query)) response = ubq.execute() logger.info( "Updated {} Awards in Elasticsearch, setting 'total_covid_outlay' to zero" .format(response["updated"]))
def test_complex_example(): ubq = UpdateByQuery() ubq = ubq.query('match', title='python') \ .query(~Q('match', title='ruby')) \ .filter(Q('term', category='meetup') | Q('term', category='conference')) \ .script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3}) ubq.query.minimum_should_match = 2 assert { 'query': { 'bool': { 'filter': [{ 'bool': { 'should': [{ 'term': { 'category': 'meetup' } }, { 'term': { 'category': 'conference' } }] } }], 'must': [{ 'match': { 'title': 'python' } }], 'must_not': [{ 'match': { 'title': 'ruby' } }], 'minimum_should_match': 2 } }, 'script': { 'source': 'ctx._source.likes += params.f', 'lang': 'painless', 'params': { 'f': 3 } } } == ubq.to_dict()
def test_overwrite_script(): ubq = UpdateByQuery() ubq = ubq.script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3}) assert { 'script': { 'source': 'ctx._source.likes += params.f', 'lang': 'painless', 'params': { 'f': 3 } } } == ubq.to_dict() ubq = ubq.script(source='ctx._source.likes++') assert { 'script': { 'source': 'ctx._source.likes++' } } == ubq.to_dict()
def test_exclude(): ubq = UpdateByQuery() ubq = ubq.exclude('match', title='python') assert { 'query': { 'bool': { 'filter': [{ 'bool': { 'must_not': [{ 'match': { 'title': 'python' } }] } }] } } } == ubq.to_dict()
def handler(event, context): es_endpoint = os.getenv('ELASTICSEARCH_SERVICE_ENDPOINT') photo_id = event["photo_id"] # Connect to Elasticsearch service try: es = es_client.get_elasticsearch_client(es_endpoint) except Exception: logging.exception('Failed to connect to Elasticsearch cluster') return response(500, { 'error': 'elasticsearch-client-connection', 'message': 'Elasticsearch service is not available' }) try: update = UpdateByQuery(using=es).index(cars_index_name) update = update.filter('term', photoId=photo_id) update = update.script(source='ctx._source.photoId = params.nullPhoto', params={'nullPhoto': None}) update.execute() return response(200, {'result': 'Update seccessfull'}) except Exception: logging.exception('Failed to cenzor photo') return response(500, { 'error': 'car-photo-cenzor-fail', 'message': 'Failed to cenzor requested photo' })
def test_reverse(): d = { 'query': { 'filtered': { 'filter': { 'bool': { 'should': [{ 'term': { 'category': 'meetup' } }, { 'term': { 'category': 'conference' } }] } }, 'query': { 'bool': { 'must': [{ 'match': { 'title': 'python' } }], 'must_not': [{ 'match': { 'title': 'ruby' } }], 'minimum_should_match': 2 } } } }, 'script': { 'source': 'ctx._source.likes += params.f', 'lang': 'painless', 'params': { 'f': 3 } } } d2 = deepcopy(d) ubq = UpdateByQuery.from_dict(d) assert d == d2 assert d == ubq.to_dict()
def test_overwrite_script(): ubq = UpdateByQuery() ubq = ubq.script( source="ctx._source.likes += params.f", lang="painless", params={"f": 3} ) assert { "script": { "source": "ctx._source.likes += params.f", "lang": "painless", "params": {"f": 3}, } } == ubq.to_dict() ubq = ubq.script(source="ctx._source.likes++") assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict()
def test_overwrite_script(): ubq = UpdateByQuery() ubq = ubq.script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3}) assert { 'script': { 'source': 'ctx._source.likes += params.f', 'lang': 'painless', 'params': { 'f': 3 } } } == ubq.to_dict() ubq = ubq.script(source='ctx._source.likes++') assert {'script': {'source': 'ctx._source.likes++'}} == ubq.to_dict()
def save_choices(choice_data): """ :param choice_data: { "student_id": "rec03s7tmgmxVlDZu", "votes": [ {"proj_id": "recjcesxayRUS9kIH", "choice": 1}, {"proj_id": "recfRVcYvECgJ0BlY", "choice": 2}, {"proj_id": "recicI3vLpk1uPV3X", "choice": 3}, {"proj_id": "recJnr93NhrWClUX2", "choice": 4}, {"proj_id": "rec4PQBsmPrR3Eeiu", "choice": 5}, ] } """ try: data = decode(choice_data, current_app.jwt_key, algorithms=["HS256"]) except exceptions.DecodeError: raise Unauthorized("Something is wrong with your JWT Encoding.") resps = [] for vote in data["votes"]: ubq_data = (UpdateByQuery( using=current_app.elasticsearch, index="mentors_index" ).query("term", id=vote["proj_id"]).script( source= 'if(!ctx._source.containsKey("listStudentsSelected")){ ctx._source.listStudentsSelected = new ArrayList();} ctx._source.listStudentsSelected.add(params.student);ctx._source.numStudentsSelected++;', params={ "student": { "student_id": data["student_id"], "choice": vote["choice"], } }, )) try: resps.append(ubq_data.execute().to_dict()) except RequestError as e: raise InternalServerError( "Something went wrong with the update, please try again.") num_updated = sum([resp["updated"] for resp in resps]) return json.dumps({"ok": True, "updated": num_updated})
def remove_from_field(doc_type_name, field_name, field_value): """Remove a value from all documents in the doc_type's index.""" doc_type = next(cls for cls in get_doc_types() if cls.__name__ == doc_type_name) script = ( f"if (ctx._source.{field_name}.contains(params.value)) {{" f"ctx._source.{field_name}.remove(ctx._source.{field_name}.indexOf(params.value))" f"}}" ) update = UpdateByQuery(using=es7_client(), index=doc_type._index._name) update = update.filter("term", **{field_name: field_value}) update = update.script(source=script, params={"value": field_value}, conflicts="proceed") # refresh index to ensure search fetches all matches doc_type._index.refresh() update.execute()
def test_reverse(): d = { 'query': { 'filtered': { 'filter': { 'bool': { 'should': [ {'term': {'category': 'meetup'}}, {'term': {'category': 'conference'}} ] } }, 'query': { 'bool': { 'must': [ {'match': {'title': 'python'}}], 'must_not': [{'match': {'title': 'ruby'}}], 'minimum_should_match': 2 } } } }, 'script': { 'source': 'ctx._source.likes += params.f', 'lang': 'painless', 'params': { 'f': 3 } } } d2 = deepcopy(d) ubq = UpdateByQuery.from_dict(d) assert d == d2 assert d == ubq.to_dict()
EXTRACTED_CASES_FILE = 'cases.json' with open( EXTRACTED_CASES_FILE, 'r' ) as f: #store your cases.json file in the same directory as this script extracted_acts = ast.literal_eval(f.read()) with open('files.txt', 'r') as f: files = f.readlines() ES_HOST = {"host": "127.0.0.1", "port": 9200} # client1 = Elasticsearch(hosts=[ES_HOST],index="test") # client2 = Elasticsearch(hosts=[ES_HOST],index="index") client = Elasticsearch(hosts=[ES_HOST]) s = Search(using=client) ubq = UpdateByQuery(using=client) start = 8811 x = "" for i in range(start, len(files)): file = files[i] case = file.split('.')[0] print(i, case, '\n') if file.strip() not in extracted_acts or extracted_acts[file.strip()] == [ "" ]: print('ignored') i += 1 continue x = "curl -XPOST \"localhost:9200/test/docs/" + str(i + 1) x += '/_update\" -H \'Content-Type: application/json\' -d\' { "doc": {"acts" :' + str(
def test_ubq_to_dict(): ubq = UpdateByQuery() assert {} == ubq.to_dict() ubq = ubq.query("match", f=42) assert {"query": {"match": {"f": 42}}} == ubq.to_dict() assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) ubq = UpdateByQuery(extra={"size": 5}) assert {"size": 5} == ubq.to_dict() ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict()
def test_from_dict_doesnt_need_query(): ubq = UpdateByQuery.from_dict({'script': {'source': 'test'}}) assert { 'script': {'source': 'test'} } == ubq.to_dict()
def test_from_dict_doesnt_need_query(): ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) assert {"script": {"source": "test"}} == ubq.to_dict()
def test_from_dict_doesnt_need_query(): ubq = UpdateByQuery.from_dict({'script': {'source': 'test'}}) assert {'script': {'source': 'test'}} == ubq.to_dict()
def handle(event: EntityUpdated): UpdateByQuery(index=Scholarship.Index.name) \ .query("match", **{"entity.name": event.old_code}) \ .script(source=f"ctx._source.entity.code = '{event.code}'; \ ctx._source.entity.name = '{event.name}'" , lang="painless") \ .execute()
def test_ubq_starts_with_no_query(): ubq = UpdateByQuery() assert ubq.query._proxied is None
es = Elasticsearch(hosts=[{"host": "elasticsearch", "port": 9200}]) with open("/scripts/template.json", "r") as f: print("update template") es.indices.put_template(name="tweet", body=json.load(f)) follow = [] conf = {} with open("/conf/config.yaml") as f: conf = yaml.safe_load(f) for l in conf["list"].keys(): for user in conf["list"][l]: id_str = user["id"] username = user["name"] follow.append(id_str) ubq = UpdateByQuery(using=es, index="tweet*") query = (ubq.query("match", user__id_str__keyword=id_str).script( source=""" int i = 0; int found = 0; if (ctx._source.list_name == null){ ctx._source.list_name = []; } for(i = 0; i < ctx._source.list_name.size(); i++){ if (ctx._source.list_name[i] == params.list_name){ found = 1; break; } } if (found == 0){ ctx._source.list_name.add(params.list_name);