def cluster_status(es): cluster = ClusterClient(es) print("\nCLUSTER HEALTH") pprint(cluster.health()) print("\nPENDING TASKS") pprint(cluster.pending_tasks()) print("\nNODES") for node in get_nodes_info(es): print(node.name, node.docs) print("\nSHARD ALLOCATION") cat = CatClient(es) print(cat.allocation(v=True))
def cluster_status(es): cluster = ClusterClient(es) print "\nCLUSTER HEALTH" pprint(cluster.health()) print "\nPENDING TASKS" pprint(cluster.pending_tasks()) print "\nNODES" for node in get_nodes_info(es): print node.name, node.docs print "\nSHARD ALLOCATION" cat = CatClient(es) print cat.allocation(v=True)
class Indexer: INDEX_PREFIX = "pulsar" def __init__(self, conf, queue): self.conf = conf host = self.conf.get("host", "es") port = self.conf.get("port", 9200) self.log = logging.getLogger("pulsar.indexer") logging.getLogger("elasticsearch").setLevel(logging.INFO) self.log.debug("port: %r" % port) self.es = Elasticsearch([{"host": host, "port": port}]) self.cluster_client = ClusterClient(self.es) health = self.cluster_client.health() if not health or health.get("number_of_nodes") < 1: raise Exception("No Elasticsearch nodes found: %r" % health) # Put our template self.indices_client = IndicesClient(self.es) self.index_prefix = self.conf.get("index_prefix", self.INDEX_PREFIX) self.indices_client.put_template( name=self.index_prefix, body=open("conf/es-template.json").read()) self.log.info("Put template to ES for pulsar indexes") self.last_event_time = time() self.index_prefix = self.index_prefix + "-" self.index_name = self.get_index_name() self.queue = queue self.counter = 0 self.stats_checkpoint = time() self.stats_every = 10000 try: # This will block as it reads from the queue self.bulk(self.es, self.iterator(), stats_only=True) except Exception as e: self.log.exception("Error with bulk", exc_info=e) def bulk(self, client, actions, stats_only=False, **kwargs): success, failed = 0, 0 # list of errors to be collected is not stats_only errors = [] for ok, item in parallel_bulk(client, actions, **kwargs): # go through request-reponse pairs and detect failures if not ok: if not stats_only: errors.append(item) failed += 1 else: success += 1 return success, failed if stats_only else errors def iterator(self): for doc in (json.loads(x) for x in iter(self.queue.get, "STOP")): self.counter += 1 if self.counter >= self.stats_every: took = time() - self.stats_checkpoint rate = float(self.counter) / took self.log.info("STATS: rate: %f" % rate) self.stats_checkpoint = time() self.counter = 0 yield doc def get_index_name(self): return "%s%s" % (self.index_prefix, datetime.date.today().isoformat())