Ejemplo n.º 1
0
    async def update(self, filter_obj: IFilter, to_update: dict) -> int:
        """
        Update object in Storage by Query.

        :param IFilter filter_obj: filter object which describes what objects need to update
        :param dict to_update: dictionary with fields and values which should be updated
        :return: number of updated entries
        """
        def dict_to_source(__to_update: dict) -> str:
            """
            Convert __to_update dict into elasticsearch source representation.

            :param __to_update: dictionary with fields and values which should be updated
            :return: elasticsearch inline representation
            """
            def _value_converter(_value: Any) -> Any:
                """
                Convert value if it is necessary
                :param _value:
                :return:
                """
                if isinstance(_value, bool):
                    return str(_value).lower()
                elif isinstance(_value, datetime):
                    return _value.strftime(self._default_date_format)

                return _value

            return " ".join((self._inline_templates.get(
                type(value), self._default_template).substitute(
                    FIELD_NAME=key, FIELD_VALUE=_value_converter(value))
                             for key, value in __to_update.items()))

        def _update(_ubq) -> UpdateByQueryResponse:
            """
            Perform Update by Query in separate thread.

            :param UpdateByQuery _ubq: UpdateByQuery instance
            :return: Response object of ElasticSearch DSL module
            """
            return _ubq.execute()

        _to_update = to_update.copy()  # Copy because it will be change below

        # NOTE: Important: call of the parent update method changes _to_update dict!
        await super().update(filter_obj, _to_update)  # Call the generic code

        ubq = UpdateByQuery(index=self._index, using=self._es_client)
        filter_by = self._query_converter.build(filter_obj)
        ubq = ubq.query(filter_by)

        source = dict_to_source(_to_update)
        ubq = ubq.script(source=source, lang=ESWords.PAINLESS)

        result = await self._loop.run_in_executor(self._tread_pool_exec,
                                                  _update, ubq)

        await self._refresh_index(
        )  # ensure that updated results will be ready immediately
        return result.updated
def test_complex_example():
    ubq = UpdateByQuery()
    ubq = ubq.query('match', title='python') \
        .query(~Q('match', title='ruby')) \
        .filter(Q('term', category='meetup') | Q('term', category='conference')) \
        .script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3})

    ubq.query.minimum_should_match = 2
    assert {
        'query': {
            'bool': {
                'filter': [
                    {
                        'bool': {
                            'should': [
                                {'term': {'category': 'meetup'}},
                                {'term': {'category': 'conference'}}
                            ]
                        }
                    }
                ],
                'must': [ {'match': {'title': 'python'}}],
                'must_not': [{'match': {'title': 'ruby'}}],
                'minimum_should_match': 2
            }
        },
        'script': {
            'source': 'ctx._source.likes += params.f',
            'lang': 'painless',
            'params': {
                'f': 3
            }
        }
    } == ubq.to_dict()
Ejemplo n.º 3
0
    def update_by_query(self, **kwargs):
        """
        This function is used to update ElasticSearch entries by record

        :param kwargs: provided kwargs
        :return:

        Example:
            >>> update_by_query(fields="publisher", query="oreilly", field_to_update="publisher", new_value="OnMedia")
        """
        try:
            client = Elasticsearch(
                hosts=ELASTIC_HOSTNAME
            )
            ubq = UpdateByQuery(
                using=client,
                index=self.book_index
            )

            search_fields = kwargs["fields"]
            query = kwargs["query"]
            field_to_update = kwargs["field_to_update"]
            new_value = kwargs["new_value"]

            update_query = ubq.query(
                "multi_match",
                query=query,
                fields=search_fields
            ).script(
                source="ctx._source.{}='{}'".format(field_to_update, new_value)
            )
            results = update_query.execute()._d_
            return results
        except Exception as ex:
            print(ex, flush=True)
Ejemplo n.º 4
0
 def on_delete(self, status_id, user_id):
     ubq = UpdateByQuery(using=es, index="tweet*")
     query = (ubq.query("match", id_str__keyword=str(status_id)).query(
         "match", user__id_str__keyword=str(user_id)).script(
             source="ctx._source.delete = true"))
     resp = query.execute()
     print(f"deleted {status_id} {user_id} {resp.to_dict()}")
     return
Ejemplo n.º 5
0
 def _update_by_query(index, field_name, old_version, new_version):
     ubq = UpdateByQuery(using=get_connection(), index=index).filter(
         'term', **{
             F'{field_name}__id': old_version.id
         }).script(source=F'ctx._source.{field_name} = params.new_value',
                   params={'new_value': new_version.to_dict()})
     refresh = getattr(settings, 'TEST', False)
     ubq.params(refresh=refresh, conflicts='proceed').execute()
def test_params_being_passed_to_search(mock_client):
    ubq = UpdateByQuery(using="mock")
    ubq = ubq.params(routing="42")
    ubq.execute()

    mock_client.update_by_query.assert_called_once_with(index=None,
                                                        body={},
                                                        routing="42")
def test_params_being_passed_to_search(mock_client):
    ubq = UpdateByQuery(using='mock')
    ubq = ubq.params(routing='42')
    ubq.execute()

    mock_client.update_by_query.assert_called_once_with(
        index=None,
        body={},
        routing='42'
    )
def test_exclude():
    ubq = UpdateByQuery()
    ubq = ubq.exclude("match", title="python")

    assert {
        "query": {
            "bool": {
                "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}]
            }
        }
    } == ubq.to_dict()
Ejemplo n.º 9
0
    def update_by_query(self, query, script_source):

        try:
            self.fix_read_only_allow_delete()
            ubq = UpdateByQuery(using=self.es, index=self.index).update_from_dict(query).script(source=script_source)
            ubq.execute()

        except Exception as err:
            print("Error: ", err)
            return False

        return True
Ejemplo n.º 10
0
    def update_all(self, field, value, **kwargs):
        # Process hits here
        # def process_hits(hits):
        #     for item in hits:
        #         self.update_field(item['_id'], field, value)

        logger = kwargs.get("logger", None)

        # Check index exists
        if not self.es.indices.exists(index=self.index):
            # print("Index " + self.index + " not exists")
            exit()

        ubq = UpdateByQuery(using=self.es, index=self.index).update_from_dict({"query": {"match_all": {}}}).script(
            source="ctx._source." + field + " = '" + value + "'")
        response = ubq.execute()

        # # Init scroll by search
        # data = self.es.search(
        #     index=self.index,
        #     doc_type=self.doc_type,
        #     scroll='15m',
        #     size=self.size,
        #     body=self.body
        # )
        #
        # # Get the scroll ID
        # sid = data['_scroll_id']
        # scroll_size = len(data['hits']['hits'])
        #
        # # Before scroll, process current batch of hits
        # # print(data['hits']['total'])
        # process_hits(data['hits']['hits'])
        # processed_docs = 0
        #
        # while scroll_size > 0:
        #
        #     data = self.es.scroll(scroll_id=sid, scroll='15m')
        #
        #     # Process current batch of hits
        #     process_hits(data['hits']['hits'])
        #
        #     # Update the scroll ID
        #     sid = data['_scroll_id']
        #
        #     # Get the number of results that returned in the last scroll
        #     scroll_size = len(data['hits']['hits'])
        #
        #     if (logger):
        #         processed_docs += scroll_size
        #         logger.add_log("Scrolling " + str(round(processed_docs * 100 / data['hits']['total'],2)) + "% documents")
        return True
def test_complex_example():
    ubq = UpdateByQuery()
    ubq = (ubq.query("match",
                     title="python").query(~Q("match", title="ruby")).filter(
                         Q("term", category="meetup")
                         | Q("term", category="conference")).script(
                             source="ctx._source.likes += params.f",
                             lang="painless",
                             params={"f": 3}))

    ubq.query.minimum_should_match = 2
    assert {
        "query": {
            "bool": {
                "filter": [{
                    "bool": {
                        "should": [
                            {
                                "term": {
                                    "category": "meetup"
                                }
                            },
                            {
                                "term": {
                                    "category": "conference"
                                }
                            },
                        ]
                    }
                }],
                "must": [{
                    "match": {
                        "title": "python"
                    }
                }],
                "must_not": [{
                    "match": {
                        "title": "ruby"
                    }
                }],
                "minimum_should_match":
                2,
            }
        },
        "script": {
            "source": "ctx._source.likes += params.f",
            "lang": "painless",
            "params": {
                "f": 3
            },
        },
    } == ubq.to_dict()
Ejemplo n.º 12
0
def test_ubq_to_dict():
    ubq = UpdateByQuery()
    assert {} == ubq.to_dict()

    ubq = ubq.query('match', f=42)
    assert {"query": {"match": {'f': 42}}} == ubq.to_dict()

    assert {"query": {"match": {'f': 42}}, "size": 10} == ubq.to_dict(size=10)

    ubq = UpdateByQuery(extra={"size": 5})
    assert {"size": 5} == ubq.to_dict()
Ejemplo n.º 13
0
    def run_update_by_query(esc, query, index):

        ubq = UpdateByQuery(using=esc, index=index).update_from_dict(
            query).params(request_timeout=100)
        finished = False
        count = 0
        while not finished and count < 3:
            try:
                count += 1
                response = ubq.execute()
                finished = True
            except Exception as e:
                print(e)
                sleep(10 * count)
                pass
def test_reverse():
    d = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "should": [
                            {"term": {"category": "meetup"}},
                            {"term": {"category": "conference"}},
                        ]
                    }
                },
                "query": {
                    "bool": {
                        "must": [{"match": {"title": "python"}}],
                        "must_not": [{"match": {"title": "ruby"}}],
                        "minimum_should_match": 2,
                    }
                },
            }
        },
        "script": {
            "source": "ctx._source.likes += params.f",
            "lang": "painless",
            "params": {"f": 3},
        },
    }

    d2 = deepcopy(d)

    ubq = UpdateByQuery.from_dict(d)

    assert d == d2
    assert d == ubq.to_dict()
    def set_elasticsearch_covid_outlays_to_zero(self, es_client,
                                                award_ids: list):
        """
        Sets 'total_covid_outlay' to zero in Elasticsearch (when not zero) for a provided
        list of award_ids.
        :param es_client: Client used to connect to Elasticsearch
        :param award_ids: List of award_ids to set outlays to zero in Elasticsearch
        """

        # Creates an Elasticsearch Query criteria for the UpdateByQuery call
        query = (ES_Q("range", **{"total_covid_outlay": {
            "gt": 0
        }}) | ES_Q("range", **{"total_covid_outlay": {
            "lt": 0
        }})) & ES_Q("terms", **{"award_id": award_ids})

        # Sets total_covid_outlay to zero based on the above Query criteria
        ubq = (UpdateByQuery(
            using=es_client, index=settings.ES_AWARDS_WRITE_ALIAS).script(
                source="ctx._source['total_covid_outlay'] = 0",
                lang="painless").query(query))
        response = ubq.execute()
        logger.info(
            "Updated {} Awards in Elasticsearch, setting 'total_covid_outlay' to zero"
            .format(response["updated"]))
Ejemplo n.º 16
0
def test_complex_example():
    ubq = UpdateByQuery()
    ubq = ubq.query('match', title='python') \
        .query(~Q('match', title='ruby')) \
        .filter(Q('term', category='meetup') | Q('term', category='conference')) \
        .script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3})

    ubq.query.minimum_should_match = 2
    assert {
        'query': {
            'bool': {
                'filter': [{
                    'bool': {
                        'should': [{
                            'term': {
                                'category': 'meetup'
                            }
                        }, {
                            'term': {
                                'category': 'conference'
                            }
                        }]
                    }
                }],
                'must': [{
                    'match': {
                        'title': 'python'
                    }
                }],
                'must_not': [{
                    'match': {
                        'title': 'ruby'
                    }
                }],
                'minimum_should_match':
                2
            }
        },
        'script': {
            'source': 'ctx._source.likes += params.f',
            'lang': 'painless',
            'params': {
                'f': 3
            }
        }
    } == ubq.to_dict()
def test_overwrite_script():
    ubq = UpdateByQuery()
    ubq = ubq.script(source='ctx._source.likes += params.f', lang='painless', params={'f': 3})
    assert {
        'script': {
            'source': 'ctx._source.likes += params.f',
            'lang': 'painless',
            'params': {
                'f': 3
            }
        }
    } == ubq.to_dict()
    ubq = ubq.script(source='ctx._source.likes++')
    assert {
        'script': {
            'source': 'ctx._source.likes++'
        }
    } == ubq.to_dict()
def test_exclude():
    ubq = UpdateByQuery()
    ubq = ubq.exclude('match', title='python')

    assert {
        'query': {
            'bool': {
                'filter': [{
                    'bool': {
                        'must_not': [{
                            'match': {
                                'title': 'python'
                            }
                        }]
                    }
                }]
            }
        }
    } == ubq.to_dict()
Ejemplo n.º 19
0
def test_exclude():
    ubq = UpdateByQuery()
    ubq = ubq.exclude('match', title='python')

    assert {
        'query': {
            'bool': {
                'filter': [{
                    'bool': {
                        'must_not': [{
                            'match': {
                                'title': 'python'
                            }
                        }]
                    }
                }]
            }
        }
    } == ubq.to_dict()
Ejemplo n.º 20
0
def handler(event, context):
    es_endpoint = os.getenv('ELASTICSEARCH_SERVICE_ENDPOINT')
    photo_id = event["photo_id"]

    # Connect to Elasticsearch service
    try:
        es = es_client.get_elasticsearch_client(es_endpoint)
    except Exception:
        logging.exception('Failed to connect to Elasticsearch cluster')
        return response(500, {
            'error': 'elasticsearch-client-connection',
            'message': 'Elasticsearch service is not available'
        })

    try:
        update = UpdateByQuery(using=es).index(cars_index_name)
        update = update.filter('term', photoId=photo_id)
        update = update.script(source='ctx._source.photoId = params.nullPhoto', params={'nullPhoto': None})
        update.execute()

        return response(200, {'result': 'Update seccessfull'})

    except Exception:
        logging.exception('Failed to cenzor photo')
        return response(500, {
            'error': 'car-photo-cenzor-fail',
            'message': 'Failed to cenzor requested photo'
        })
def test_ubq_to_dict():
    ubq = UpdateByQuery()
    assert {} == ubq.to_dict()

    ubq = ubq.query('match', f=42)
    assert {"query": {"match": {'f': 42}}} == ubq.to_dict()

    assert {"query": {"match": {'f': 42}}, "size": 10} == ubq.to_dict(size=10)

    ubq = UpdateByQuery(extra={"size": 5})
    assert {"size": 5} == ubq.to_dict()
Ejemplo n.º 22
0
def test_reverse():
    d = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'should': [{
                            'term': {
                                'category': 'meetup'
                            }
                        }, {
                            'term': {
                                'category': 'conference'
                            }
                        }]
                    }
                },
                'query': {
                    'bool': {
                        'must': [{
                            'match': {
                                'title': 'python'
                            }
                        }],
                        'must_not': [{
                            'match': {
                                'title': 'ruby'
                            }
                        }],
                        'minimum_should_match': 2
                    }
                }
            }
        },
        'script': {
            'source': 'ctx._source.likes += params.f',
            'lang': 'painless',
            'params': {
                'f': 3
            }
        }
    }

    d2 = deepcopy(d)

    ubq = UpdateByQuery.from_dict(d)

    assert d == d2
    assert d == ubq.to_dict()
def test_overwrite_script():
    ubq = UpdateByQuery()
    ubq = ubq.script(
        source="ctx._source.likes += params.f", lang="painless", params={"f": 3}
    )
    assert {
        "script": {
            "source": "ctx._source.likes += params.f",
            "lang": "painless",
            "params": {"f": 3},
        }
    } == ubq.to_dict()
    ubq = ubq.script(source="ctx._source.likes++")
    assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict()
Ejemplo n.º 24
0
def test_overwrite_script():
    ubq = UpdateByQuery()
    ubq = ubq.script(source='ctx._source.likes += params.f',
                     lang='painless',
                     params={'f': 3})
    assert {
        'script': {
            'source': 'ctx._source.likes += params.f',
            'lang': 'painless',
            'params': {
                'f': 3
            }
        }
    } == ubq.to_dict()
    ubq = ubq.script(source='ctx._source.likes++')
    assert {'script': {'source': 'ctx._source.likes++'}} == ubq.to_dict()
Ejemplo n.º 25
0
def save_choices(choice_data):
    """
    :param choice_data: {
        "student_id": "rec03s7tmgmxVlDZu",
        "votes": [
            {"proj_id": "recjcesxayRUS9kIH", "choice": 1},
            {"proj_id": "recfRVcYvECgJ0BlY", "choice": 2},
            {"proj_id": "recicI3vLpk1uPV3X", "choice": 3},
            {"proj_id": "recJnr93NhrWClUX2", "choice": 4},
            {"proj_id": "rec4PQBsmPrR3Eeiu", "choice": 5},
        ]
    }
    """
    try:
        data = decode(choice_data, current_app.jwt_key, algorithms=["HS256"])
    except exceptions.DecodeError:
        raise Unauthorized("Something is wrong with your JWT Encoding.")
    resps = []
    for vote in data["votes"]:
        ubq_data = (UpdateByQuery(
            using=current_app.elasticsearch, index="mentors_index"
        ).query("term", id=vote["proj_id"]).script(
            source=
            'if(!ctx._source.containsKey("listStudentsSelected")){ ctx._source.listStudentsSelected = new ArrayList();} ctx._source.listStudentsSelected.add(params.student);ctx._source.numStudentsSelected++;',
            params={
                "student": {
                    "student_id": data["student_id"],
                    "choice": vote["choice"],
                }
            },
        ))
        try:
            resps.append(ubq_data.execute().to_dict())
        except RequestError as e:
            raise InternalServerError(
                "Something went wrong with the update, please try again.")
    num_updated = sum([resp["updated"] for resp in resps])
    return json.dumps({"ok": True, "updated": num_updated})
Ejemplo n.º 26
0
def remove_from_field(doc_type_name, field_name, field_value):
    """Remove a value from all documents in the doc_type's index."""
    doc_type = next(cls for cls in get_doc_types() if cls.__name__ == doc_type_name)

    script = (
        f"if (ctx._source.{field_name}.contains(params.value)) {{"
        f"ctx._source.{field_name}.remove(ctx._source.{field_name}.indexOf(params.value))"
        f"}}"
    )

    update = UpdateByQuery(using=es7_client(), index=doc_type._index._name)
    update = update.filter("term", **{field_name: field_value})
    update = update.script(source=script, params={"value": field_value}, conflicts="proceed")

    # refresh index to ensure search fetches all matches
    doc_type._index.refresh()

    update.execute()
def test_reverse():
    d =  {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'should': [
                            {'term': {'category': 'meetup'}},
                            {'term': {'category': 'conference'}}
                        ]
                    }
                },
                'query': {
                    'bool': {
                        'must': [ {'match': {'title': 'python'}}],
                        'must_not': [{'match': {'title': 'ruby'}}],
                        'minimum_should_match': 2
                    }
                }
            }
        },
        'script': {
            'source': 'ctx._source.likes += params.f',
            'lang': 'painless',
            'params': {
                'f': 3
            }
        }
    }

    d2 = deepcopy(d)

    ubq = UpdateByQuery.from_dict(d)

    assert d == d2
    assert d == ubq.to_dict()
Ejemplo n.º 28
0
EXTRACTED_CASES_FILE = 'cases.json'

with open(
        EXTRACTED_CASES_FILE, 'r'
) as f:  #store your cases.json file in the same directory as this script
    extracted_acts = ast.literal_eval(f.read())

with open('files.txt', 'r') as f:
    files = f.readlines()

ES_HOST = {"host": "127.0.0.1", "port": 9200}
# client1 = Elasticsearch(hosts=[ES_HOST],index="test")
# client2 = Elasticsearch(hosts=[ES_HOST],index="index")
client = Elasticsearch(hosts=[ES_HOST])
s = Search(using=client)
ubq = UpdateByQuery(using=client)

start = 8811
x = ""
for i in range(start, len(files)):
    file = files[i]
    case = file.split('.')[0]
    print(i, case, '\n')
    if file.strip() not in extracted_acts or extracted_acts[file.strip()] == [
            ""
    ]:
        print('ignored')
        i += 1
        continue
    x = "curl -XPOST \"localhost:9200/test/docs/" + str(i + 1)
    x += '/_update\" -H \'Content-Type: application/json\' -d\' { "doc": {"acts" :' + str(
def test_ubq_to_dict():
    ubq = UpdateByQuery()
    assert {} == ubq.to_dict()

    ubq = ubq.query("match", f=42)
    assert {"query": {"match": {"f": 42}}} == ubq.to_dict()

    assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10)

    ubq = UpdateByQuery(extra={"size": 5})
    assert {"size": 5} == ubq.to_dict()

    ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")})
    assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict()
def test_from_dict_doesnt_need_query():
    ubq = UpdateByQuery.from_dict({'script': {'source': 'test'}})

    assert {
        'script': {'source': 'test'}
    } == ubq.to_dict()
def test_from_dict_doesnt_need_query():
    ubq = UpdateByQuery.from_dict({"script": {"source": "test"}})

    assert {"script": {"source": "test"}} == ubq.to_dict()
Ejemplo n.º 32
0
def test_from_dict_doesnt_need_query():
    ubq = UpdateByQuery.from_dict({'script': {'source': 'test'}})

    assert {'script': {'source': 'test'}} == ubq.to_dict()
Ejemplo n.º 33
0
 def handle(event: EntityUpdated):
     UpdateByQuery(index=Scholarship.Index.name) \
         .query("match", **{"entity.name": event.old_code}) \
         .script(source=f"ctx._source.entity.code = '{event.code}'; \
                 ctx._source.entity.name = '{event.name}'"                                                             , lang="painless") \
         .execute()
Ejemplo n.º 34
0
def test_ubq_starts_with_no_query():
    ubq = UpdateByQuery()

    assert ubq.query._proxied is None
Ejemplo n.º 35
0
es = Elasticsearch(hosts=[{"host": "elasticsearch", "port": 9200}])
with open("/scripts/template.json", "r") as f:
    print("update template")
    es.indices.put_template(name="tweet", body=json.load(f))

follow = []
conf = {}
with open("/conf/config.yaml") as f:
    conf = yaml.safe_load(f)
    for l in conf["list"].keys():
        for user in conf["list"][l]:
            id_str = user["id"]
            username = user["name"]
            follow.append(id_str)
            ubq = UpdateByQuery(using=es, index="tweet*")
            query = (ubq.query("match", user__id_str__keyword=id_str).script(
                source="""
                        int i = 0;
                        int found = 0;
                        if (ctx._source.list_name == null){
                            ctx._source.list_name = [];
                        }
                        for(i = 0; i < ctx._source.list_name.size(); i++){
                            if (ctx._source.list_name[i] == params.list_name){
                                found = 1;
                                break;
                            }
                        }
                        if (found == 0){
                            ctx._source.list_name.add(params.list_name);