def cluster_status(es):
    cluster = ClusterClient(es)
    print("\nCLUSTER HEALTH")
    pprint(cluster.health())
    print("\nPENDING TASKS")
    pprint(cluster.pending_tasks())
    print("\nNODES")
    for node in get_nodes_info(es):
        print(node.name, node.docs)
    print("\nSHARD ALLOCATION")
    cat = CatClient(es)
    print(cat.allocation(v=True))
def cluster_status(es):
    cluster = ClusterClient(es)
    print "\nCLUSTER HEALTH"
    pprint(cluster.health())
    print "\nPENDING TASKS"
    pprint(cluster.pending_tasks())
    print "\nNODES"
    for node in get_nodes_info(es):
        print node.name, node.docs
    print "\nSHARD ALLOCATION"
    cat = CatClient(es)
    print cat.allocation(v=True)
Esempio n. 3
0
    def __to_tf_idf(client, index, file_id):
        def tf_idf(params):
            (term, freq), (_, doc_freq) = params
            tf = freq / max_freq
            idf = np.log2(document_count / doc_freq)
            return term, tf * idf

        def document_term_vector():
            tv = client.termvectors(index=index,
                                    doc_type='document',
                                    id=file_id,
                                    fields=['text'],
                                    positions=False,
                                    term_statistics=True)
            file_td, file_df = {}, {}
            if 'text' in tv['term_vectors']:
                for t in tv['term_vectors']['text']['terms']:
                    file_td[t] = tv['term_vectors']['text']['terms'][t][
                        'term_freq']
                    file_df[t] = tv['term_vectors']['text']['terms'][t][
                        'doc_freq']
            return sorted(file_td.items()), sorted(file_df.items())

        term_frequency, document_appearances = document_term_vector()
        max_freq = max([f for _, f in term_frequency])
        document_count = int(
            CatClient(client).count(index=[index], format='json')[0]['count'])

        vector = dict(map(tf_idf, zip(term_frequency, document_appearances)))
        return Rocchio.__normalize(vector)
def doc_count(client, index):
    """
    Returns the number of documents in an index
    :param client:
    :param index:
    :return:
    """
    return int(CatClient(client).count(index=[index], format='json')[0]['count'])
Esempio n. 5
0
def doc_count(client, index):
    """
    Returns the number of documents in an index
    :param client: ElasticSearch client
    :param index: Index to count documents from
    :return: number of documents for desired  index
    """
    return int(
        CatClient(client).count(index=[index], format='json')[0]['count'])
Esempio n. 6
0
def relocate():
    conf = config()

    try:
        es = Elasticsearch(conf['cluster_address'])
        escat = CatClient(es)
        escluster = ClusterClient(es)
    except Exception, e:
        print("Unable to connect to ES cluster. Reason: {}".format(e))
        sys.exit(1)
Esempio n. 7
0
def doc_count(client, index):
    return int(CatClient(client).count(index=[index], format='json')[0]['count'])
def shard_status(es):
    cat = CatClient(es)
    print cat.shards(v=True)
Esempio n. 9
0
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.client import CatClient
import json
host = '192.168.15.168'

es = Elasticsearch(hosts=host)
print(es.ping())

index_cli = IndicesClient(es)
# print(index_cli)
cat_cli = CatClient(es)

all_index = cat_cli.indices()
# print(all_index)

# get all index name
index_list_source = all_index.split('\n')[:-1]
index_list = []
for i in index_list_source:
    index = i.split()[2]
    index_list.append(index)

# print(index_list)

# get index  mapping info
for i in index_list:
    mapping_info = index_cli.get_mapping(index=i)
    mapping_info = json.dumps(mapping_info, indent=2, ensure_ascii=False)
    print('============')
    # print(i,mapping_info)
def shard_status(es):
    cat = CatClient(es)
    print(cat.shards(v=True))
Esempio n. 11
0
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch.client import CatClient
import json

host = '127.0.0.1:9200'
es = Elasticsearch(hosts=host)
index_cli = IndicesClient(es)
cat_cli = CatClient(es)

index_list = cat_cli.indices()
print(index_list)
result = index_cli.get_settings(index="test_index0")

# result = json.dumps(result,indent=2)
print(result)