def get_alarm_relevant_events(objs, **kwargs): from_es = kwargs["act_from_es"] to_es = kwargs["act_to_es"] events = [] for obj in objs: alarm = obj["_source"] start_date = datetime.datetime.fromtimestamp(int(alarm["start_time"]) / 1000) end_date = datetime.datetime.fromtimestamp(int(alarm["end_time"]) / 1000) indices = [("event_" + (start_date + datetime.timedelta(x)).strftime("%Y%m%d")) for x in range(0, (end_date - start_date).days + 1)] query_str = {"query": {"terms": {"_id": alarm["event_id"]}}} s_res = from_es.search(index=indices, body=query_str, allow_no_indices=True, size=2000) events.extend(s_res["hits"]["hits"]) while len(events) > 3000: helpers.bulk(client=to_es, actions=events, chunk_size=3000) logging.debug("Write 3000 items into dest ES.") del events[:3000] # write the rest items into dst ES. helpers.bulk(client=to_es, actions=events, chunk_size=3000) logging.debug("Write the remaining " + str(len(events)) + " items into dest ES.") return
def do_search_and_write(from_es, to_es, indices, query, step=5000, **kwargs): action = None if "action" in kwargs: action = kwargs["action"] # act_para = None # if "act_para" in kwargs: # act_para = kwargs["act_para"] if (to_es is None) or (from_es is None): logging.log(logging.ERROR, "The destination es has not been correctly specified.") return else: search_res = helpers.scan(client=from_es, query=query, index=indices, scroll=u"6m", size=step) bat = [] cnt = 0 for i in search_res: bat.append(i) cnt += 1 if (cnt % 30000) == 0: if action is not None: action(objs=bat, **kwargs) helpers.bulk(client=to_es, actions=bat, chunk_size=step, max_chunk_bytes=209715200) del bat[:] logging.info("Write 30000 items to dst ES.") # insert the rest data into dst_es if action is not None: action(objs=bat, **kwargs) helpers.bulk(client=to_es, actions=bat, chunk_size=step, max_chunk_bytes=204857600) return
def store_results(self, data): """Store results back to ES.""" index_out = self._prep_index_name(self.config.storage.ES_TARGET_INDEX) actions = [{ "_index": index_out, "_type": "log", "_source": data[i] } for i in range(len(data))] helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)
def bulk_update_cgm(cgms, actions=None, op='update', index=None): index = index or INDEX if not actions and cgms: actions = ({ '_op_type': op, '_index': index, '_id': cgm._id, '_type': 'collectionSubmission', 'doc': serialize_cgm(cgm), 'doc_as_upsert': True, } for cgm in cgms) try: helpers.bulk(client(), actions or [], refresh=True, raise_on_error=False) except helpers.BulkIndexError as e: raise exceptions.BulkUpdateError(e.errors)
def set_data(es, input_file, index_name=settings.es_index, doc_type_name=settings.es_type): #read in with open(input_file, 'r') as fp: line_list = fp.readlines() # make ACTIONS ACTIONS = [] for line in line_list: fields = json.loads(line) # print fields[1] action = { "_index": index_name, "_type": doc_type_name, "_source": { "id": fields["id"], "work_id": fields["work_id"], "title": fields["title"], "creator": fields["creator"], "creator_variant": fields["creator_variant"], "vol_id": fields["vol_id"], "category": fields["category"], "sutra_body": fields["sutra_body"], } } ACTIONS.append(action) # batch proc success, _ = bulk(es, ACTIONS, index=index_name, raise_on_error=True) print('Performed %d actions' % success)
def sql_migrate(index, sql, max_id, increment, es_args=None, **kwargs): """ Run provided SQL and send output to elastic. :param str index: Elastic index to update (formatted into `sql`) :param str sql: SQL to format and run. See __init__.py in this module :param int max_id: Last known object id. Indicates when to stop paging :param int increment: Page size :param dict es_args: Dict or None, to pass to `helpers.bulk` :kwargs: Additional format arguments for `sql` arg :return int: Number of migrated objects """ if es_args is None: es_args = {} total_pages = int(ceil(max_id / float(increment))) total_objs = 0 page_start = 0 page_end = 0 page = 0 while page_end <= (max_id + increment): page += 1 page_end += increment if page <= total_pages: logger.info('Updating page {} / {}'.format(page_end / increment, total_pages)) else: # An extra page is included to cover the edge case where: # max_id == (total_pages * increment) - 1 # and two additional objects are created during runtime. logger.info('Cleaning up...') with connection.cursor() as cursor: cursor.execute( sql.format(index=index, page_start=page_start, page_end=page_end, **kwargs)) ser_objs = cursor.fetchone()[0] if ser_objs: total_objs += len(ser_objs) helpers.bulk(client(), ser_objs, **es_args) page_start = page_end return total_objs
def bulk_update_nodes(serialize, nodes, index=None, category=None): """Updates the list of input projects :param function Node-> dict serialize: :param Node[] nodes: Projects, components, registrations, or preprints :param str index: Index of the nodes :return: """ index = index or INDEX actions = [] for node in nodes: serialized = serialize(node) if serialized: actions.append({ '_op_type': 'update', '_index': index, '_id': node._id, '_type': category or get_doctype_from_node(node), 'doc': serialized, 'doc_as_upsert': True, }) if actions: return helpers.bulk(client(), actions)
"response": "/accept", "dst_port": 80, "event_level": 0 } } if __name__ == "__main__": logging.info("==================== Start ====================") dst_es = Elasticsearch(hosts=config["dst_es"], sniff_on_start=True, sniff_on_connection_fail=True, timeout=120) bat = [] _id = 1 while True: item = copy.deepcopy(data) item["_id"] = _id bat.append(item) if (_id % 1000) == 0: helpers.bulk(client=dst_es, actions=bat, chunk_size=1000, max_chunk_bytes=209715200) bat = [] if (_id % 30000) == 0: logging.info("Write 30000 items to ES.") _id += 1 logging.info("==================== End ====================\n\n")