def qa_matrix(index, sources_config): #mapping = es.indices.get_mapping(index=index) fields = BaseItem.combined_index_fields sources = load_sources_config(sources_config) #pprint(fields) all_body = { 'query': { 'constant_score': { 'query': {'match_all': {}} } }, 'aggs': { 'source_id': { 'terms': {'field': 'meta.source_id', 'size': len(sources)}, } }, 'size': 0 } all_result = es.search(index=index, body=all_body) all_counts = {b['key']: { 'all': b['doc_count']} for b in all_result['aggregations']['source_id']['buckets']} for field in fields: body = { 'query': { 'filtered': { 'query': { 'constant_score': { 'query': {'match_all': {}} } }, 'filter': { 'exists': {'field': field} } } }, 'aggs': { 'source_id': { 'terms': {'field': 'meta.source_id', 'size': len(sources)}, } }, 'size': 0 } result = es.search(index=index, body=body) for b in result['aggregations']['source_id']['buckets']: all_counts[b['key']][field] = b['doc_count'] # print field # pprint(result) pprint(all_counts) print ("{:<24}" * (len(fields) + 2)).format(*(sorted(['Source', 'all'] + fields.keys()))) #print "{:<8} {:<15} {:<10}".format('Source',) for source_id, counts in all_counts.iteritems(): print ("{:<24}" * (len(fields) + 2)).format(*([source_id, counts['all']] + [counts.get(c, 0) for c in sorted(fields)]))
def api_request(self, index_name, doc_type, query=None, *args, **kwargs): api_query = { "filters": {}, "from": 0, "size": 10, "sort": "_score", "order": "asc" } kwargs['@type'] = doc_type if query is not None: api_query["query"] = query for k, v in kwargs.iteritems(): if k in api_query: api_query[k] = v else: if isinstance(v, basestring): api_query["filters"][k] = {"terms": [v]} elif isinstance(v, list): api_query["filters"][k] = {"terms": v} else: api_query["filters"][k] = v return es.search(index=index_name, body=api_query)
def enrich_item(self, enrichments, object_id, combined_index_doc, doc): enrichments['percolations'] = {} for item in combined_index_doc.get('item', {}).get('items', []): if item.get('@type', 'Note') not in settings.ENRICHER_PERCOLATOR_AS2_TYPES: continue result = es.search(index=settings.COMBINED_INDEX, body={ "query": { "percolate": { "field": "query", "document_type": item.get('@type', 'Note'), "document": { 'item': item } } } }) # log.info('Percolated item:') # log.info(item) # log.info('Percolating result:') # log.info(result) if result.get('hits', {}).get('total', 0) > 0: tags = [ '%s/%s' % ( settings.ENRICHER_PERCOLATOR_BASE_HREF, h['_id'], ) for h in result['hits']['hits'] ] enrichments['percolations'][item['@id']] = tags log.info('Percolation final results:') log.info(enrichments['percolations']) return enrichments
def qa_matrix(index, sources_config): #mapping = es.indices.get_mapping(index=index) fields = BaseItem.combined_index_fields sources = load_sources_config(sources_config) #pprint(fields) all_body = { 'query': { 'constant_score': { 'query': { 'match_all': {} } } }, 'aggs': { 'source_id': { 'terms': { 'field': 'meta.source_id', 'size': len(sources) }, } }, 'size': 0 } all_result = es.search(index=index, body=all_body) all_counts = { b['key']: { 'all': b['doc_count'] } for b in all_result['aggregations']['source_id']['buckets'] } for field in fields: body = { 'query': { 'filtered': { 'query': { 'constant_score': { 'query': { 'match_all': {} } } }, 'filter': { 'exists': { 'field': field } } } }, 'aggs': { 'source_id': { 'terms': { 'field': 'meta.source_id', 'size': len(sources) }, } }, 'size': 0 } result = es.search(index=index, body=body) for b in result['aggregations']['source_id']['buckets']: all_counts[b['key']][field] = b['doc_count'] # print field # pprint(result) pprint(all_counts) print("{:<24}" * (len(fields) + 2)).format(*(sorted(['Source', 'all'] + fields.keys()))) #print "{:<8} {:<15} {:<10}".format('Source',) for source_id, counts in all_counts.iteritems(): print("{:<24}" * (len(fields) + 2)).format( *([source_id, counts['all']] + [counts.get(c, 0) for c in sorted(fields)]))