def installPipelines(): conn = get_connection() client = IngestClient(conn) client.put_pipeline(id='ingest_attachment', body={ 'description': "Extract attachment information", 'processors': [{ "attachment": { "field": "data", "indexed_chars": "-1" }, "remove": { "field": "data" } }] }) client.put_pipeline(id='add_timestamp', body={ 'description': "Adds an index_date timestamp", 'processors': [ { "set": { "field": "index_date", "value": "{{_ingest.timestamp}}", }, }, ] })
def add_attachment_pipeline(self): pipeline = { "description": "Extract attachment information encoded in Base64 with UTF-8 charset", "processors": [{ "attachment": { "field": "attachment" } }] } ingest_client = IngestClient(self.client) if self.is_new: ingest_client.put_pipeline('attachment', pipeline)
def _createPipeline(self): ic = IngestClient(self.conn) self.pipeline_id = 'monthlyprocessor' pipeline_body = { "description": "monthly date-time index naming", "processors" : [ { "date_index_name" : { "field" : "@timestamp", "index_name_prefix" : "{{ _index}}-", "date_rounding" : "M", } } ] } ic.put_pipeline(id=self.pipeline_id, body=pipeline_body)
def putPipelines(): conn = get_connection() client = IngestClient(conn) client.put_pipeline(id='rename_structure_unit_description', body={ 'description': "Rename field _source.description to _source.desc", 'processors': [ { "rename": { "field": "_source.description", "target_field": "_source.desc", }, }, ] })
def _create_ingest_pipeline(self) -> None: """ Create ingest pipeline to allow extract file content and use them for search. """ p = IngestClient(self.es) # TODO - G.M - 2019-05-31 - check if possible to set specific analyzer for # attachment content parameters. Goal : # allow ngram or lang specific indexing for "in file search" p.put_pipeline( id="attachment", body={ "description": "Extract attachment information", "processors": [{ "attachment": { "field": "file" } }], }, )
class Pipeline: """ A pipeline is a definition of a series of processors that are to be executed in the same order as they are declared. (https://www.elastic.co/guide/en/elasticsearch/reference/current/pipeline.html) Parameters ---------- client: elasticsearch.Elasticsearch a elasticsearch client. name: str name for pipeline. pipeline_handler: :obj:`PipelineHandler` object that contains json object of pipeline. Attributes ---------- client: str name: str pipeline_handler: :obj:`PipelineHandler` """ def __init__(self, client, name, pipeline_handler): self._pipeline_handler = pipeline_handler self._name = name self.ingest_client = IngestClient(client) def create_pipeline(self, params=None): """ create_pipeline method create the elasticsearch pipeline with processors specified in pipeline_handler json. """ try: self.ingest_client.put_pipeline( self._name, load_json(self._pipeline_handler._json)) except Exception as e: raise (e)
"description": "Extract attachment information from arrays", "processors": [{ "foreach": { "field": "attachments", "processor": { "attachment": { "target_field": "_ingest._value.attachment", "field": "_ingest._value.data" } } } }] } val = ex_indices.put_pipeline(id="attachment1", body=doc1) print(val) doc = { "attachments": [{ "filename": "test.pdf", "data": "JVBERi0xLjMNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNzk0NS9PIDkvRSAzNTI0L04gMS9UIDc2NTYvSCBbIDQ1MSAxMzddPj4NZW5kb2JqDSAgICAgICAgICAgICAgICAgICAgICAgDQoxMyAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDREQzkxQTE4NzVBNkQ3MDdBRUMyMDNCQjAyMUM5M0EwPjxGNkM5MkIzNjhBOEExMzQwODQ1N0ExRDM5NUEzN0VCOT5dL0luZGV4WzcgMjFdL0luZm8gNiAwIFIvTGVuZ3RoIDUyL1ByZXYgNzY1Ny9Sb290IDggMCBSL1NpemUgMjgvVHlwZS9YUmVmL1dbMSAyIDFdPj5zdHJlYW0NCmjeYmJkEGBgYmCyARIMIIKxAUgwpwIJNkcg8eUYAxMjwzSQLAMjucR/xp1fAAIMAEykBvANCmVuZHN0cmVhbQ1lbmRvYmoNc3RhcnR4cmVmDQowDQolJUVPRg0KICAgICAgICANCjI3IDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9JIDY5L0xlbmd0aCA1OC9TIDM4Pj5zdHJlYW0NCmjeYmBgYGFgYPzPAATcNgyogJEBJMvRgCzGAsUMDA0M3Azc0x50JoA4zAwMWgIQLYwsAAEGAL/iBRkNCmVuZHN0cmVhbQ1lbmRvYmoNOCAwIG9iag08PC9NZXRhZGF0YSAxIDAgUi9QYWdlcyA1IDAgUi9UeXBlL0NhdGFsb2c+Pg1lbmRvYmoNOSAwIG9iag08PC9Db250ZW50cyAxMSAwIFIvQ3JvcEJveFswIDAgNTk1IDg0Ml0vTWVkaWFCb3hbMCAwIDU5NSA4NDJdL1BhcmVudCA1IDAgUi9SZXNvdXJjZXMgMTQgMCBSL1JvdGF0ZSAwL1R5cGUvUGFnZT4+DWVuZG9iag0xMCAwIG9iag08PC9GaWx0ZXIvRmxhdGVEZWNvZGUvRmlyc3QgOTQvTGVuZ3RoIDc3My9OIDEzL1R5cGUvT2JqU3RtPj5zdHJlYW0NCmjevFRtb9owEP4r/gPgl9hxIlVI0I6u0lqhJls/RPmQgguRQoISV6P/fncJLoG1K6XSiMz55e58vue545IwwhXhnibcJyKAlSaeCAgPiOeDCImUighGVMiI4CQUoCYIZ1oS4YGt5kRIsGIhEeAokLAGFcYkubigl1VR1dEmmxtcNAovY+R+NKLftvY6spnFg+uI4/XdwbQqLexNBcYAWzSOBQbQTSXe3k19vLibBnhnZz6rq3lkbEJnV1Mam61NR6OEXmbF/fUEr8rW6ywRQwE/iPRQpvQ2s3W+TdhQcnQ+FBwdDxkPPRCe0rjSXEFe2JDzUKAImEIdjZENQ8VUSh9WuTWzKi9t0m0ReOGQBSFEk0IY0Zg8ZUVjaHSLpoLG9/RmYUqb2xcav2zMPj+jEehf5U9Ppjbl3DQJp4/PRWFsulMs59UiL5et3iRrDCaQRi/rx6p4PURYMVXR86NFI7TkNK5+ljkoGMJ3ScUztG+djZs5RERCpiB/m+8mX64sYfTKdPsDwTmdFtmyAca0VpNJtU0GPtBn4GkkgQfMYDJI29O7bG3ouM6zYjCpisVtTG9sVuTzcbksDPiNrFn/Aip6+zDwqjrf2Ko+fN2BF/dG+pCX47LJX9fTvG7s5SqrXXx7d0hsfPCPbKfBub9PTv1sYpel1hBcL+yqSYRGSn7ta2nyKn3O39Dxff2hH6X81rovuxMXpZPuDi8IWy3P89I+wEHI3wPYdwDLHsDKR4CZBoCxUzCmewDH+do0d+b3fbXOyln0DsrsY4z/dnQW0IIfAa3lKUCrw2RDjWPa2tGmVu3/T4UcQe1me6iOAXXQO8hCKd/QlLCr2KHEyHCOo08ADcPt49i9A6ggeie7uBgj/+vTPku/1GV8BSQUypHQ08dd5nzqOfPzCOcdEg40Tmosny3JMOiXpNRdSXLBfMyGeL8k277ZZeYoRQOuPtOF/+n3vNypo2IV/Ixi3X+nFuipPfeDjsxccbr/rqgP+zHu9IoRCtEVo4tiV9JAiD8CDAA+0IrxDQplbmRzdHJlYW0NZW5kb2JqDTExIDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9MZW5ndGggMTUzMD4+c3RyZWFtDQpIibRXS2/jNhBGr/4Vc1uqiBW9H8d0tynQ02IroIduD7LEJCpk0RDppPlT/Y2dB2l7nS0KLFoEUPgacuabmW/GP3Sb267LIIXuYZMWcVJAgn8yytI8rqukgrqscZ7k0O03t+9tCYPlYwnYYXP70y8pPNrNNomTJKugGzY0qhroXja/qbsoTeJMjdG2jlNldhqibUpD3GjiWg3RNlNrtK3iCnd7Bx8/3MP9RAuNmrWNfu9+Jh0Lr2MmCmbQtHGbkXJZG+eZKMc6JK3XIaMR6zDiu3/BR7O6fjdr+GBQhyRu1XDc68XBfVTGucJFWlv3uJmjgqjLZ4Xa8ObnCCZLqieqh+MyPevV9rMsPEwzWZXhyKx7FONV9xRGh5WMb5W2en32L+sow2+4cZ7ZzAS2aZyW0H1gCJPGG9K2mRhiHqIcYYGI79dRgaDxRNbN4uzN5TxK8LvymKyKC9WzjHPTEm1b9MsjuadRN3ySRQc+IaKzOYq05S0RXkZ4lFWZH54mkbFRosDIvV5RL8GXvcpTYrLFm0XKWzEamR5JUdJUX4i6G5AXdbQtcc9r3dMs9waOorGIWQuIFWHafe+jogiRSSMCEwGE/nCYp6F3k1mgR8MOc+/IiXC0rEam9AjOwLBqCdEe3yqU0zC5OPgsi3PvspTC8BRxjJkEUCvYTh7HRWYjX1rypaWaxXMSQg8Somgc6NkfG/iYW80yDYQXQ5XhEsXwOFm3TrujmGJRPzAYpIPZawsUK1cBJqDUJ1BqUfywGsyQvQUU3Jtl5hda8h1mmQK9sFqYtua4OM2BXRNGL5N7Ik0HVs9LDcCpYZ96MgBTC4M+V9PyGNFlgt/tvWcfAbJhJFkrUkh9F3V/UPpX/lBcVJj+eAYBlZ3GE4NwV0id0htWtSXfc7e8mkXfoJNfX540elOEPaugEV6YYUm9cJ0KKDCgx8xBI7BIT9G2wUAjr2aKDYzhbiYqyBPGSZmjxPiiCR4OIZ4HAqHAE+JA/DCm/YxihoJOhfmw+oUeccMkYLy2rCu5sQjGpj6006SpROFPmrXr+TtGkk40XjE7ChVzpH3SA69NxHuNOkxyZOHjTiIVk4gEZExRdL7E8wwNEQOPBk8N3yCn9nK5aOJkYsFiVMrK5AcYcBcqL4Rxpd5FmIJVEEMPyPKlnvClBhZ2+vKiIx+yXj0yYIu1jbjoq+nwhiNGs7zDYEXw4akX7iYoiQPgzB+eGij1LDLHP1EGCZzTtqK0tVdJgPqU35gHxdfyQEJjG4ZkEhFSTYx7jVyotD6hsAUoLy4qzxeVclE/v/SvXByR+JEF4LBOSESDL6ZoiVpXzTNZc/PrVTXHRGov8i7JTvj7ggfMy1RbUUUmoca/MwkTUQXjxVE/iyPEP/U1vZDfi+K/xDb0GWndppfQpgRtjnQ3cTGqEdqe/xOZIgwvyIYp4fEaZdQKEHoogwSO1efLrWufUOvwluXkcS6NtfqzH97inF3hHDRvQ4dEFYNJh6OWbOi5QXF6pNIr7YtsEN5hex1n3yz5fobKLtYu7kOseXBkKwmtTL2jMBgKNPmZwr5MvSqkHvLt2gc3F/ysb3awNGdpiAes9Q7rlVAakfJlG0QlXQTZBmx/qFkJzQxnJ9WkSkmtXoyD2VgspkdNKRy6gbMtLIG2SNvmDbpq29LsnCo+jJ8xDZgQM/Y2Zh3G9bRgWnCiZGp/QL5CNtxN8+SIiNX/yQzbs5oUvkHLDvnpQfyPSQR3g4xWbss/6X4MLdFKvbA/1zN+5BJ2CJVGgm40L8ts+pG7KoksrKG7U+ELr2D8ZESPQfTUxiCJ7i5Z+hwqeXMR9UQOFE90QYW6YdtEs7CqsSX9dyC/mV1zgbBoGt8+vTfsSYz4gb9OflOcOsEaSfFUOHNPvumpvabxKnksG2D3sjr7kyvLYSmRZSqCPKXKGIQm/0NGjlKnzaPBX3n9tL9p9D6Tm2QR3fdVF4SI4ah9pHAFjl9EXUYghV0eY680/EukCF0CF2hl3QXtEelReBHnc6uh4Ff67sSBP3abvwcArRiH3QoNCmVuZHN0cmVhbQ1lbmRvYmoNMTIgMCBvYmoNPDwvRmlsdGVyL0ZsYXRlRGVjb2RlL0xlbmd0aCAyMDg+PnN0cmVhbQ0KSIlUkL0OwjAMhPc+hUcQQ9rOVRdYOvAjCuxp4laRiBO56dC3JykFxBBL9uXTnS32zaEhE0Bc2KkWA/SGNOPoJlYIHQ6GoChBGxXWbqnKSg8iwu08BrQN9Q6qKhPXKI6BZ9i0s+3cc5dvQZxZIxsaYHMr7o84aCfvn2iRAuRQ16Cxz8T+KP1JWozyii7zYjV0GkcvFbKkAaHKi/pdkPS/9iG6/t3+vlZlXpZ1FomPluC0yddbTcwx1rLukihlMITfi3jnk2V62UuAAQBDyGk/Cg0KZW5kc3RyZWFtDWVuZG9iag0xIDAgb2JqDTw8L0xlbmd0aCAzNjU2L1N1YnR5cGUvWE1ML1R5cGUvTWV0YWRhdGE+PnN0cmVhbQ0KPD94cGFja2V0IGJlZ2luPSLvu78iIGlkPSJXNU0wTXBDZWhpSHpyZVN6TlRjemtjOWQiPz4KPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iQWRvYmUgWE1QIENvcmUgNC4yLjEtYzA0MyA1Mi4zNzI3MjgsIDIwMDkvMDEvMTgtMTU6MDg6MDQgICAgICAgICI+CiAgIDxyZGY6UkRGIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyI+CiAgICAgIDxyZGY6RGVzY3JpcHRpb24gcmRmOmFib3V0PSIiCiAgICAgICAgICAgIHhtbG5zOmRjPSJodHRwOi8vcHVybC5vcmcvZGMvZWxlbWVudHMvMS4xLyI+CiAgICAgICAgIDxkYzpmb3JtYXQ+YXBwbGljYXRpb24vcGRmPC9kYzpmb3JtYXQ+CiAgICAgICAgIDxkYzpjcmVhdG9yPgogICAgICAgICAgICA8cmRmOlNlcT4KICAgICAgICAgICAgICAgPHJkZjpsaT5jZGFpbHk8L3JkZjpsaT4KICAgICAgICAgICAgPC9yZGY6U2VxPgogICAgICAgICA8L2RjOmNyZWF0b3I+CiAgICAgICAgIDxkYzp0aXRsZT4KICAgICAgICAgICAgPHJkZjpBbHQ+CiAgICAgICAgICAgICAgIDxyZGY6bGkgeG1sOmxhbmc9IngtZGVmYXVsdCI+VGhpcyBpcyBhIHRlc3QgUERGIGZpbGU8L3JkZjpsaT4KICAgICAgICAgICAgPC9yZGY6QWx0PgogICAgICAgICA8L2RjOnRpdGxlPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgICAgPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIKICAgICAgICAgICAgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIj4KICAgICAgICAgPHhtcDpDcmVhdGVEYXRlPjIwMDAtMDYtMjlUMTA6MjE6MDgrMTE6MDA8L3htcDpDcmVhdGVEYXRlPgogICAgICAgICA8eG1wOkNyZWF0b3JUb29sPk1pY3Jvc29mdCBXb3JkIDguMDwveG1wOkNyZWF0b3JUb29sPgogICAgICAgICA8eG1wOk1vZGlmeURhdGU+MjAxMy0xMC0yOFQxNToyNDoxMy0wNDowMDwveG1wOk1vZGlmeURhdGU+CiAgICAgICAgIDx4bXA6TWV0YWRhdGFEYXRlPjIwMTMtMTAtMjhUMTU6MjQ6MTMtMDQ6MDA8L3htcDpNZXRhZGF0YURhdGU+CiAgICAgIDwvcmRmOkRlc2NyaXB0aW9uPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91dD0iIgogICAgICAgICAgICB4bWxuczpwZGY9Imh0dHA6Ly9ucy5hZG9iZS5jb20vcGRmLzEuMy8iPgogICAgICAgICA8cGRmOlByb2R1Y2VyPkFjcm9iYXQgRGlzdGlsbGVyIDQuMCBmb3IgV2luZG93czwvcGRmOlByb2R1Y2VyPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgICAgPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIKICAgICAgICAgICAgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iPgogICAgICAgICA8eG1wTU06RG9jdW1lbnRJRD51dWlkOjA4MDVlMjIxLTgwYTgtNDU5ZS1hNTIyLTYzNWVkNWMxZTJlNjwveG1wTU06RG9jdW1lbnRJRD4KICAgICAgICAgPHhtcE1NOkluc3RhbmNlSUQ+dXVpZDo2MmQ2YWU2ZC00M2M0LTQ3MmQtOWIyOC03YzRhZGQ4ZjllNDY8L3htcE1NOkluc3RhbmNlSUQ+CiAgICAgIDwvcmRmOkRlc2NyaXB0aW9uPgogICA8L3JkZjpSREY+CjwveDp4bXBtZXRhPgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgCjw/eHBhY2tldCBlbmQ9InciPz4NCmVuZHN0cmVhbQ1lbmRvYmoNMiAwIG9iag08PC9GaWx0ZXIvRmxhdGVEZWNvZGUvRmlyc3QgNC9MZW5ndGggNDgvTiAxL1R5cGUvT2JqU3RtPj5zdHJlYW0NCmjeMlUwULCx0XfOL80rUTDU985MKY62BIoFxeqHVBak6gckpqcW29kBBBgA1ncLgA0KZW5kc3RyZWFtDWVuZG9iag0zIDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9GaXJzdCA0L0xlbmd0aCAxNjcvTiAxL1R5cGUvT2JqU3RtPj5zdHJlYW0NCmjePMvBCsIwEEXRX5mdDaKdxCpVSqFY3AkuBNexSelA6EAyRfx7A4qPu3znAAhNU3aLTByLwVkKb1Weo7dCPPdWfNGfDOYdzFGj0VivtV4hrn6vrK40RE48Cjw4Oqi3qMoruz/WuwxrvTeV3m2w+uJbZLcMPhZdxk8r0FMSCsFHqLYII0d40Oz4lVR5Jwm+uE+UIGdBfBK49RcYKXjVth8BBgBnZztkDQplbmRzdHJlYW0NZW5kb2JqDTQgMCBvYmoNPDwvRGVjb2RlUGFybXM8PC9Db2x1bW5zIDMvUHJlZGljdG9yIDEyPj4vRmlsdGVyL0ZsYXRlRGVjb2RlL0lEWzw0REM5MUExODc1QTZENzA3QUVDMjAzQkIwMjFDOTNBMD48RjZDOTJCMzY4QThBMTM0MDg0NTdBMUQzOTVBMzdFQjk+XS9JbmZvIDYgMCBSL0xlbmd0aCAzNy9Sb290IDggMCBSL1NpemUgNy9UeXBlL1hSZWYvV1sxIDIgMF0+PnN0cmVhbQ0KaN5iYmBgYGLkPcLEwD+ViYGhh4mBkYWJ8bEkkM0IEGAAKlkDFA0KZW5kc3RyZWFtDWVuZG9iag1zdGFydHhyZWYNCjExNg0KJSVFT0YNCg==" }] } res = es.index(index="test-index0", doc_type='tweet', id=1, body=doc, pipeline='attachment1')
class IngestConnector: def __init__( self, pipeline_id: str = "pdf_content", field: str = "data", pipeline_description: str = "Extracting info from pdf content"): self.pipeline_id: str = pipeline_id self.index_name: str = pipeline_id + "_index" self.field: str = field self.pipeline_description: str = pipeline_description self.ingest_client = IngestClient(current_app.elasticsearch) def create_pipeline(self): self.ingest_client.put_pipeline(id=self.pipeline_id, body={ 'description': self.pipeline_description, 'processors': [{ "attachment": { "field": self.field } }] }) def delete_pipeline(self): self.ingest_client.delete_pipeline(id=self.pipeline_id) def get_pipeline(self): return self.ingest_client.get_pipeline(id=self.pipeline_id) def add_to_index(self, id_: int, content: str, content_page: int, content_paragraph: int): current_app.elasticsearch.index( index=self.index_name, id=id_, pipeline=self.pipeline_id, body={ self.field: base64.b64encode(content.encode("utf-8")).decode("utf-8"), "content_page": content_page, "content_paragraph": content_paragraph, }) def remove_from_index(self, id_: int): current_app.elasticsearch.delete(index=self.index_name, id=id_) def api_search(self, query: str): return current_app.elasticsearch.search( index=self.index_name, body={"query": { "match": { "attachment.content": query } }}) def search(self, query: str): search = self.api_search(query) ids = [int(hit['_id']) for hit in search['hits']['hits']] if len(ids) == 0: return None when = [] for i in range(len(ids)): when.append((ids[i], i)) res = KnowledgePdfContent.query.filter( KnowledgePdfContent.id.in_(ids)).order_by( db.case(when, value=KnowledgePdfContent.id)).all() return res[0] if len(res) > 0 else None
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']]) from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) from elasticsearch.client import IngestClient c = IngestClient(es) try: c.get_pipeline('geoip') except elasticsearch.exceptions.NotFoundError: c.put_pipeline('geoip', body='''{ "description" : "Add geoip info", "processors" : [ { "geoip" : { "field" : "ip" } } ] }''') class GeoIp(InnerDoc): continent_name = Keyword() country_iso_code = Keyword() country_name = Keyword() location = GeoPoint() class UserAgentBrowser(InnerDoc):
def setup_dga_model(ctx, model_tag, repo, model_dir, overwrite): """Upload ML DGA model and dependencies and enrich DNS data.""" import io import requests import shutil import zipfile es_client: Elasticsearch = ctx.obj['es'] client_info = es_client.info() license_client = LicenseClient(es_client) if license_client.get()['license']['type'].lower() not in ('platinum', 'enterprise'): client_error( 'You must have a platinum or enterprise subscription in order to use these ML features' ) # download files if necessary if not model_dir: if not model_tag: client_error( 'model-tag or model-dir required to download model files') click.echo(f'Downloading artifact: {model_tag}') release_url = f'https://api.github.com/repos/{repo}/releases/tags/{model_tag}' release = requests.get(release_url) release.raise_for_status() assets = [ a for a in release.json()['assets'] if a['name'].startswith('ML-DGA') and a['name'].endswith('.zip') ] if len(assets) != 1: client_error( f'Malformed release: expected 1 match ML-DGA zip, found: {len(assets)}!' ) zipped_url = assets[0]['browser_download_url'] zipped = requests.get(zipped_url) z = zipfile.ZipFile(io.BytesIO(zipped.content)) dga_dir = get_path('ML-models', 'DGA') model_dir = os.path.join(dga_dir, model_tag) os.makedirs(dga_dir, exist_ok=True) shutil.rmtree(model_dir, ignore_errors=True) z.extractall(dga_dir) click.echo(f'files saved to {model_dir}') # read files as needed z.close() def get_model_filename(pattern): paths = list(Path(model_dir).glob(pattern)) if not paths: client_error( f'{model_dir} missing files matching the pattern: {pattern}') if len(paths) > 1: client_error( f'{model_dir} contains multiple files matching the pattern: {pattern}' ) return paths[0] @contextmanager def open_model_file(name): pattern = expected_ml_dga_patterns[name] with open(get_model_filename(pattern), 'r') as f: yield json.load(f) model_id, _ = os.path.basename( get_model_filename('dga_*_model.json')).rsplit('_', maxsplit=1) click.echo( f'Setting up DGA model: "{model_id}" on {client_info["name"]} ({client_info["version"]["number"]})' ) # upload model ml_client = MlClient(es_client) ingest_client = IngestClient(es_client) existing_models = ml_client.get_trained_models() if model_id in [ m['model_id'] for m in existing_models.get('trained_model_configs', []) ]: if overwrite: ctx.invoke(remove_dga_model, model_id=model_id, es_client=es_client, ml_client=ml_client, ingest_client=ingest_client, force=True) else: client_error( f'Model: {model_id} already exists on stack! Try --overwrite to force the upload' ) click.secho('[+] Uploading model (may take a while)') with open_model_file('model') as model_file: try: ml_client.put_trained_model(model_id=model_id, body=model_file) except elasticsearch.ConnectionTimeout: msg = 'Connection timeout, try increasing timeout using `es --timeout <secs> experimental setup_dga_model`.' client_error(msg) # install scripts click.secho('[+] Uploading painless scripts') with open_model_file('dga_ngrams_create') as painless_install: es_client.put_script(id='dga_ngrams_create', body=painless_install) # f'{model_id}_dga_ngrams_create' with open_model_file('dga_ngrams_transform_delete') as painless_delete: es_client.put_script(id='dga_ngrams_transform_delete', body=painless_delete) # f'{model_id}_dga_ngrams_transform_delete' # Install ingest pipelines click.secho('[+] Uploading pipelines') def _build_es_script_error(err, pipeline_file): error = err.info['error'] cause = error['caused_by'] error_msg = [ f'Script error while uploading {pipeline_file}: {cause["type"]} - {cause["reason"]}', ' '.join(f'{k}: {v}' for k, v in error['position'].items()), '\n'.join(error['script_stack']) ] return click.style('\n'.join(error_msg), fg='red') with open_model_file('dns_enrich_pipeline') as ingest_pipeline1: try: ingest_client.put_pipeline(id='dns_enrich_pipeline', body=ingest_pipeline1) except elasticsearch.RequestError as e: if e.error == 'script_exception': client_error(_build_es_script_error(e, 'ingest_pipeline1'), e, ctx=ctx) else: raise with open_model_file( 'dns_dga_inference_enrich_pipeline') as ingest_pipeline2: try: ingest_client.put_pipeline(id='dns_dga_inference_enrich_pipeline', body=ingest_pipeline2) except elasticsearch.RequestError as e: if e.error == 'script_exception': client_error(_build_es_script_error(e, 'ingest_pipeline2'), e, ctx=ctx) else: raise click.echo('Ensure that you have updated your packetbeat.yml config file.') click.echo(' - reference: ML_DGA.md #2-update-packetbeat-configuration') click.echo( 'Associated rules and jobs can be found under ML-experimental-detections releases in the repo' ) click.echo('To upload rules, run: kibana upload-rule <ml-rule.toml>') click.echo( 'To upload ML jobs, run: es experimental upload-ml-job <ml-job.json>')
def createElasticSearchIngestPipeline(self): esIngestClient = IngestClient(self.client) self.constructLanguagePipeline( esIngestClient, 'title_language_detector', 'Work title language detection', field='title.' ) self.constructLanguagePipeline( esIngestClient, 'alt_title_language_detector', 'Work alt_title language detection', prefix='_ingest._value.' ) self.constructLanguagePipeline( esIngestClient, 'edition_title_language_detector', 'Edition title language detection', prefix='_ingest._value.', field='title.' ) self.constructLanguagePipeline( esIngestClient, 'edition_sub_title_language_detector', 'Edition subtitle language detection', prefix='_ingest._value.', field='sub_title.' ) self.constructLanguagePipeline( esIngestClient, 'subject_heading_language_detector', 'Subject heading language detection', prefix='_ingest._value.', field='heading.' ) esIngestClient.put_pipeline( id='foreach_alt_title_language_detector', body={ 'description': 'loop for parsing alt_titles', 'processors': [ { 'foreach': { 'field': 'alt_titles', 'processor': { 'pipeline': { 'name': 'alt_title_language_detector', } } } } ] } ) esIngestClient.put_pipeline( id='edition_language_detector', body={ 'description': 'loop for parsing edition fields', 'processors': [ { 'pipeline': { 'name': 'edition_title_language_detector', 'ignore_failure': True } }, { 'pipeline': { 'name': 'edition_sub_title_language_detector', 'ignore_failure': True } } ] } ) esIngestClient.put_pipeline( id='language_detector', body={ 'description': 'Full language processing', 'processors': [ { 'pipeline': { 'name': 'title_language_detector', 'ignore_failure': True } }, { 'pipeline': { 'name': 'foreach_alt_title_language_detector', 'ignore_failure': True } }, { 'foreach': { 'field': 'editions', 'processor': { 'pipeline': { 'name': 'edition_language_detector', 'ignore_failure': True } } } }, { 'foreach': { 'field': 'subjects', 'ignore_missing': True, 'processor': { 'pipeline': { 'name': 'subject_heading_language_detector', 'ignore_failure': True } } } } ] } )
"pl": 1.9, "pw": 0.4 } }] } # simulate ingest pipeline IngestClient.simulate(es, body) # In[ ]: # store the pipeline for use in prod pipeline_name = model_id + '_ingest_pipeline' body = {'description': 'predict flower type', 'processors': processors} IngestClient.put_pipeline(es, id=pipeline_name, body=body) # In[ ]: # verify pipeline IngestClient.get_pipeline(es, pipeline_name) # In[ ]: # create index template with our new pipeline as the default pipeline settings = { "index_patterns": ["flower_measurements-*"], "settings": { "default_pipeline": "jeffs-rfc-flower-type_ingest_pipeline" }
def make_pipelines(): with open(os.path.join(pipelines_dir, "pipelines.json")) as file: pipelines = json.load(file) ing_client = IngestClient(client) for key in pipelines.keys(): print("Creating {0} created {1}".format(key,ing_client.put_pipeline(key, pipelines[key])))