def cluster_status(es):
    cluster = ClusterClient(es)
    print("\nCLUSTER HEALTH")
    pprint(cluster.health())
    print("\nPENDING TASKS")
    pprint(cluster.pending_tasks())
    print("\nNODES")
    for node in get_nodes_info(es):
        print(node.name, node.docs)
    print("\nSHARD ALLOCATION")
    cat = CatClient(es)
    print(cat.allocation(v=True))
예제 #2
0
def cluster_status(es):
    cluster = ClusterClient(es)
    print "\nCLUSTER HEALTH"
    pprint(cluster.health())
    print "\nPENDING TASKS"
    pprint(cluster.pending_tasks())
    print "\nNODES"
    for node in get_nodes_info(es):
        print node.name, node.docs
    print "\nSHARD ALLOCATION"
    cat = CatClient(es)
    print cat.allocation(v=True)
예제 #3
0
class Indexer:
    INDEX_PREFIX = "pulsar"

    def __init__(self, conf, queue):
        self.conf = conf
        host = self.conf.get("host", "es")
        port = self.conf.get("port", 9200)
        self.log = logging.getLogger("pulsar.indexer")
        logging.getLogger("elasticsearch").setLevel(logging.INFO)
        self.log.debug("port: %r" % port)
        self.es = Elasticsearch([{"host": host, "port": port}])
        self.cluster_client = ClusterClient(self.es)
        health = self.cluster_client.health()
        if not health or health.get("number_of_nodes") < 1:
            raise Exception("No Elasticsearch nodes found: %r" % health)
        # Put our template
        self.indices_client = IndicesClient(self.es)
        self.index_prefix = self.conf.get("index_prefix", self.INDEX_PREFIX)
        self.indices_client.put_template(
            name=self.index_prefix, body=open("conf/es-template.json").read())
        self.log.info("Put template to ES for pulsar indexes")
        self.last_event_time = time()
        self.index_prefix = self.index_prefix + "-"
        self.index_name = self.get_index_name()
        self.queue = queue
        self.counter = 0
        self.stats_checkpoint = time()
        self.stats_every = 10000

        try:
            # This will block as it reads from the queue
            self.bulk(self.es, self.iterator(), stats_only=True)
        except Exception as e:
            self.log.exception("Error with bulk", exc_info=e)

    def bulk(self, client, actions, stats_only=False, **kwargs):
        success, failed = 0, 0

        # list of errors to be collected is not stats_only
        errors = []

        for ok, item in parallel_bulk(client, actions, **kwargs):
            # go through request-reponse pairs and detect failures
            if not ok:
                if not stats_only:
                    errors.append(item)
                failed += 1
            else:
                success += 1

        return success, failed if stats_only else errors

    def iterator(self):
        for doc in (json.loads(x) for x in iter(self.queue.get, "STOP")):
            self.counter += 1
            if self.counter >= self.stats_every:
                took = time() - self.stats_checkpoint
                rate = float(self.counter) / took
                self.log.info("STATS: rate: %f" % rate)
                self.stats_checkpoint = time()
                self.counter = 0
            yield doc

    def get_index_name(self):
        return "%s%s" % (self.index_prefix, datetime.date.today().isoformat())