def reindex_uptime(): try: Datastores.es.indices.delete("rcae_uptime_ce_*") except elasticsearch.exceptions.NotFoundError as e: log.error(e) log.info("reindexing uptime") i = 0 task_start = datetime.datetime.now() uptime_tables = detect_tables("ae_uptime_ce_metrics_p_") for partition_table in uptime_tables: conn = DBSession.connection().execution_options(stream_results=True) result = conn.execute(partition_table.select()) while True: chunk = result.fetchmany(2000) if not chunk: break es_docs = defaultdict(list) for row in chunk: i += 1 item = UptimeMetric(**dict(list(row.items()))) d_range = item.partition_id es_docs[d_range].append(item.es_doc()) if es_docs: name = partition_table.name log.info("round {}, {}".format(i, name)) for k, v in es_docs.items(): to_update = {"_index": k, "_type": "log"} [i.update(to_update) for i in v] elasticsearch.helpers.bulk(Datastores.es, v) log.info("total docs {} {}".format(i, datetime.datetime.now() - task_start))
def reindex_slow_calls(): try: Datastores.es.delete_index('rcae_sc*') except Exception as e: print(e) log.info('reindexing slow calls') i = 0 task_start = datetime.datetime.now() slow_calls_tables = detect_tables('slow_calls_p_') for partition_table in slow_calls_tables: conn = DBSession.connection().execution_options(stream_results=True) result = conn.execute(partition_table.select()) while True: chunk = result.fetchmany(2000) if not chunk: break es_docs = defaultdict(list) for row in chunk: i += 1 item = SlowCall(**dict(list(row.items()))) d_range = item.partition_id es_docs[d_range].append(item.es_doc()) if es_docs: name = partition_table.name log.info('round {}, {}'.format(i, name)) for k, v in es_docs.items(): Datastores.es.bulk_index(k, 'log', v) log.info('total docs {} {}'.format(i, datetime.datetime.now() - task_start))
def partitions_remove(request): permanent_partitions, daily_partitions = get_partition_stats() pg_partitions = [] es_partitions = [] for item in list(permanent_partitions.values()) + list( daily_partitions.values()): es_partitions.extend(item["elasticsearch"]) pg_partitions.extend(item["pg"]) FormCls = get_partition_deletion_form(es_partitions, pg_partitions) form = FormCls( es_index=request.unsafe_json_body["es_indices"], pg_index=request.unsafe_json_body["pg_indices"], confirm=request.unsafe_json_body["confirm"], csrf_context=request, ) if form.validate(): for ix in form.data["es_index"]: log.warning("deleting ES partition: {}".format(ix)) Datastores.es.indices.delete(ix) for ix in form.data["pg_index"]: log.warning("deleting PG partition: {}".format(ix)) stmt = sa.text("DROP TABLE %s CASCADE" % sa.text(ix)) session = DBSession() session.connection().execute(stmt) mark_changed(session) for field, error in form.errors.items(): msg = "%s: %s" % (field, error[0]) request.session.flash(msg, "error") permanent_partitions, daily_partitions = get_partition_stats() return { "permanent_partitions": sorted(list(permanent_partitions.items()), key=lambda x: x[0], reverse=True), "daily_partitions": sorted(list(daily_partitions.items()), key=lambda x: x[0], reverse=True), }
def reindex_reports(): reports_groups_tables = detect_tables('reports_groups_p_') try: Datastores.es.delete_index('rcae_r*') except Exception as e: log.error(e) log.info('reindexing report groups') i = 0 task_start = datetime.datetime.now() for partition_table in reports_groups_tables: conn = DBSession.connection().execution_options(stream_results=True) result = conn.execute(partition_table.select()) while True: chunk = result.fetchmany(2000) if not chunk: break es_docs = defaultdict(list) for row in chunk: i += 1 item = ReportGroup(**dict(list(row.items()))) d_range = item.partition_id es_docs[d_range].append(item.es_doc()) if es_docs: name = partition_table.name log.info('round {}, {}'.format(i, name)) for k, v in es_docs.items(): Datastores.es.bulk_index(k, 'report_group', v, id_field="_id") log.info('total docs {} {}'.format(i, datetime.datetime.now() - task_start)) i = 0 log.info('reindexing reports') task_start = datetime.datetime.now() reports_tables = detect_tables('reports_p_') for partition_table in reports_tables: conn = DBSession.connection().execution_options(stream_results=True) result = conn.execute(partition_table.select()) while True: chunk = result.fetchmany(2000) if not chunk: break es_docs = defaultdict(list) for row in chunk: i += 1 item = Report(**dict(list(row.items()))) d_range = item.partition_id es_docs[d_range].append(item.es_doc()) if es_docs: name = partition_table.name log.info('round {}, {}'.format(i, name)) for k, v in es_docs.items(): Datastores.es.bulk_index(k, 'report', v, id_field="_id", parent_field='_parent') log.info('total docs {} {}'.format(i, datetime.datetime.now() - task_start)) log.info('reindexing reports stats') i = 0 task_start = datetime.datetime.now() reports_stats_tables = detect_tables('reports_stats_p_') for partition_table in reports_stats_tables: conn = DBSession.connection().execution_options(stream_results=True) result = conn.execute(partition_table.select()) while True: chunk = result.fetchmany(2000) if not chunk: break es_docs = defaultdict(list) for row in chunk: rd = dict(list(row.items())) # remove legacy columns # TODO: remove the column later rd.pop('size', None) item = ReportStat(**rd) i += 1 d_range = item.partition_id es_docs[d_range].append(item.es_doc()) if es_docs: name = partition_table.name log.info('round {}, {}'.format(i, name)) for k, v in es_docs.items(): Datastores.es.bulk_index(k, 'log', v) log.info('total docs {} {}'.format(i, datetime.datetime.now() - task_start))