def handler(event, context): es_endpoint = os.getenv('ELASTICSEARCH_SERVICE_ENDPOINT') photo_id = event["photo_id"] # Connect to Elasticsearch service try: es = es_client.get_elasticsearch_client(es_endpoint) except Exception: logging.exception('Failed to connect to Elasticsearch cluster') return response(500, { 'error': 'elasticsearch-client-connection', 'message': 'Elasticsearch service is not available' }) try: update = UpdateByQuery(using=es).index(cars_index_name) update = update.filter('term', photoId=photo_id) update = update.script(source='ctx._source.photoId = params.nullPhoto', params={'nullPhoto': None}) update.execute() return response(200, {'result': 'Update seccessfull'}) except Exception: logging.exception('Failed to cenzor photo') return response(500, { 'error': 'car-photo-cenzor-fail', 'message': 'Failed to cenzor requested photo' })
def test_params_being_passed_to_search(mock_client): ubq = UpdateByQuery(using="mock") ubq = ubq.params(routing="42") ubq.execute() mock_client.update_by_query.assert_called_once_with(index=None, body={}, routing="42")
def test_params_being_passed_to_search(mock_client): ubq = UpdateByQuery(using='mock') ubq = ubq.params(routing='42') ubq.execute() mock_client.update_by_query.assert_called_once_with( index=None, body={}, routing='42' )
def update_by_query(self, query, script_source): try: self.fix_read_only_allow_delete() ubq = UpdateByQuery(using=self.es, index=self.index).update_from_dict(query).script(source=script_source) ubq.execute() except Exception as err: print("Error: ", err) return False return True
def remove_from_field(doc_type_name, field_name, field_value): """Remove a value from all documents in the doc_type's index.""" doc_type = next(cls for cls in get_doc_types() if cls.__name__ == doc_type_name) script = ( f"if (ctx._source.{field_name}.contains(params.value)) {{" f"ctx._source.{field_name}.remove(ctx._source.{field_name}.indexOf(params.value))" f"}}" ) update = UpdateByQuery(using=es7_client(), index=doc_type._index._name) update = update.filter("term", **{field_name: field_value}) update = update.script(source=script, params={"value": field_value}, conflicts="proceed") # refresh index to ensure search fetches all matches doc_type._index.refresh() update.execute()
def update_all(self, field, value, **kwargs): # Process hits here # def process_hits(hits): # for item in hits: # self.update_field(item['_id'], field, value) logger = kwargs.get("logger", None) # Check index exists if not self.es.indices.exists(index=self.index): # print("Index " + self.index + " not exists") exit() ubq = UpdateByQuery(using=self.es, index=self.index).update_from_dict({"query": {"match_all": {}}}).script( source="ctx._source." + field + " = '" + value + "'") response = ubq.execute() # # Init scroll by search # data = self.es.search( # index=self.index, # doc_type=self.doc_type, # scroll='15m', # size=self.size, # body=self.body # ) # # # Get the scroll ID # sid = data['_scroll_id'] # scroll_size = len(data['hits']['hits']) # # # Before scroll, process current batch of hits # # print(data['hits']['total']) # process_hits(data['hits']['hits']) # processed_docs = 0 # # while scroll_size > 0: # # data = self.es.scroll(scroll_id=sid, scroll='15m') # # # Process current batch of hits # process_hits(data['hits']['hits']) # # # Update the scroll ID # sid = data['_scroll_id'] # # # Get the number of results that returned in the last scroll # scroll_size = len(data['hits']['hits']) # # if (logger): # processed_docs += scroll_size # logger.add_log("Scrolling " + str(round(processed_docs * 100 / data['hits']['total'],2)) + "% documents") return True
def run_update_by_query(esc, query, index): ubq = UpdateByQuery(using=esc, index=index).update_from_dict( query).params(request_timeout=100) finished = False count = 0 while not finished and count < 3: try: count += 1 response = ubq.execute() finished = True except Exception as e: print(e) sleep(10 * count) pass