def save(request): logger = LogManager(__name__, 'SAVE SEARCH') ds = Datasets().activate_dataset(request.session) es_m = ds.build_manager(ES_Manager) es_params = request.POST es_m.build(es_params) combined_query = es_m.get_combined_query() try: q = combined_query desc = request.POST['search_description'] s_content = json.dumps([request.POST[x] for x in request.POST.keys() if 'match_txt' in x]) search = Search(author=request.user,search_content=s_content,description=desc,dataset=Dataset.objects.get(pk=int(request.session['dataset'])),query=json.dumps(q)) search.save() logger.set_context('user_name', request.user.username) logger.set_context('search_id', search.id) logger.info('search_saved') except Exception as e: print('-- Exception[{0}] {1}'.format(__name__, e)) logger.set_context('es_params', es_params) logger.exception('search_saving_failed') return HttpResponse()
def _get_tags_for_text_index(self, text, alignment, highlight_data): logger = LogManager(__name__, '_GET_TAGS_FOR_TEXT_INDEX') data_mapping = {index: datum for index, datum in enumerate(highlight_data)} data_index_to_spans = [datum['spans'] for datum in highlight_data] text_index_to_data_index = [[] for i in range(len(text))] for data_index, spans in enumerate(data_index_to_spans): for span in spans: for text_index in range(*span): try: text_index_to_data_index[alignment[text_index]].append(data_index) except Exception as e: # Throws exception when index out of range # For example Gets index out of range if in _derive_highlight_data while uses >= instead of > print('-- Exception[{0}] {1}'.format(__name__, e)) logger.set_context('text', text) logger.exception('_get_tags_for_text_index try catch execption') text_index_to_data_index = [frozenset(data_indices) for data_indices in text_index_to_data_index] spans_to_tags = [(spans, self._get_tag_from_highlight_data([data_mapping[data_index] for data_index in data_indices])) for spans, data_indices in self._get_spans_to_data_indices(text_index_to_data_index)] return spans_to_tags
def remove_facts_from_document(self, rm_facts_dict, bs=7500): '''remove a certain fact from all documents given a [str]key and [str]val''' logger = LogManager(__name__, 'FACT MANAGER REMOVE FACTS') try: # Clears readonly block just in case the index has been set to read only self.es_m.clear_readonly_block() query = self._fact_deletion_query(rm_facts_dict) self.es_m.load_combined_query(query) response = self.es_m.scroll(size=bs, field_scroll=self.field) scroll_id = response['_scroll_id'] total_docs = response['hits']['total'] docs_left = total_docs # DEBUG print('Starting.. Total docs - ', total_docs) # DEBUG batch = 0 while total_docs > 0: print('Docs left:', docs_left) # DEBUG data = '' for document in response['hits']['hits']: new_field = [] # The new facts field for fact in document['_source'][self.field]: # If the fact name is in rm_facts_dict keys if fact["fact"] in rm_facts_dict: # If the fact value is not in the delete key values if fact['str_val'] not in rm_facts_dict.getlist( fact["fact"]): new_field.append(fact) else: new_field.append(fact) # Update dataset data += json.dumps({ "update": { "_id": document['_id'], "_type": document['_type'], "_index": document['_index'] } }) + '\n' document = {'doc': {self.field: new_field}} data += json.dumps(document) + '\n' response = self.es_m.scroll(scroll_id=scroll_id, size=bs, field_scroll=self.field) total_docs = len(response['hits']['hits']) docs_left -= bs # DEBUG scroll_id = response['_scroll_id'] self.es_m.plain_post_bulk(self.es_m.es_url, data) print('DONE') # DEBUG logger.set_context('docs_left', total_docs) logger.set_context('batch', batch) logger.info('remove_facts_from_document') except: print(traceback.format_exc()) logger.set_context('es_params', self.es_params) logger.exception('remove_facts_from_document_failed')
def delete(request): logger = LogManager(__name__, 'DELETE SEARCH') search_id = request.GET['pk'] logger.set_context('user_name', request.user.username) logger.set_context('search_id', search_id) try: Search.objects.get(pk=search_id).delete() logger.info('search_deleted') except Exception as e: print('-- Exception[{0}] {1}'.format(__name__, e)) logger.exception('search_deletion_failed') return HttpResponse(search_id)
def delete(request): post_data = json.loads(request.POST['data']) logger = LogManager(__name__, 'DELETE SEARCH') search_ids = post_data['pks'] logger.set_context('user_name', request.user.username) logger.set_context('search_ids', search_ids) try: for search_id in search_ids: Search.objects.get(pk=search_id).delete() logger.info('search_deleted:' + search_id) except Exception as e: print('-- Exception[{0}] {1}'.format(__name__, e)) logger.exception('search_deletion_failed') return HttpResponse()
def save(request): logger = LogManager(__name__, 'SAVE SEARCH') ds = Datasets().activate_datasets(request.session) es_m = ds.build_manager(ES_Manager) es_params = request.POST es_m.build(es_params) combined_query = es_m.get_combined_query() try: q = combined_query desc = request.POST['search_description'] s_content = {} # make json for x in request.POST.keys(): if 'match_txt' in x: # get the ID of the field, eg match_txt_1 returns 1 match_txt_1533 returns 1533 field_id = x.rsplit("_", 1)[-1] match_field = request.POST['match_field_' + field_id] if match_field in s_content.keys(): s_content[match_field].append(request.POST[x]) else: s_content[match_field] = [request.POST[x]] search = Search(author=request.user, search_content=json.dumps(s_content), description=desc, query=json.dumps(q)) logger.info('Saving search for datasets: {}'.format( request.session['dataset'])) search.save() for dataset_id in request.session['dataset']: dataset = Dataset.objects.get(pk=int(dataset_id)) search.datasets.add(dataset) search.save() logger.set_context('user_name', request.user.username) logger.set_context('search_id', search.id) logger.info('search_saved') except Exception as e: print('-- Exception[{0}] {1}'.format(__name__, e)) logger.set_context('es_params', es_params) logger.exception('search_saving_failed') return HttpResponse()
def save(request): logger = LogManager(__name__, 'SAVE SEARCH') ds = Datasets().activate_datasets(request.session) es_m = ds.build_manager(ES_Manager) es_params = request.POST es_m.build(es_params) combined_query = es_m.get_combined_query() try: q = combined_query desc = request.POST['search_description'] s_content = {} # make json for x in request.POST.keys(): if 'match_txt' in x: # get the ID of the field, eg match_txt_1 returns 1 match_txt_1533 returns 1533 field_id = x.rsplit("_", 1)[-1] match_field = request.POST['match_field_' + field_id] if match_field in s_content.keys(): s_content[match_field].append(request.POST[x]) else: s_content[match_field] = [request.POST[x]] search = Search(author=request.user, search_content=json.dumps(s_content), description=desc, query=json.dumps(q)) logger.info('Saving search for datasets: {}'.format(request.session['dataset'])) search.save() for dataset_id in request.session['dataset']: dataset = Dataset.objects.get(pk=int(dataset_id)) search.datasets.add(dataset) search.save() logger.set_context('user_name', request.user.username) logger.set_context('search_id', search.id) logger.info('search_saved') except Exception as e: print('-- Exception[{0}] {1}'.format(__name__, e)) logger.set_context('es_params', es_params) logger.exception('search_saving_failed') return HttpResponse()
def facts_agg(es_params, request): logger = LogManager(__name__, 'FACTS AGGREGATION') distinct_values = [] query_results = [] lexicon = [] aggregation_data = es_params['aggregate_over'] aggregation_data = json.loads(aggregation_data) original_aggregation_field = aggregation_data['path'] aggregation_field = 'texta_link.facts' try: aggregation_size = 50 aggregations = {"strings": {es_params['sort_by']: {"field": aggregation_field, 'size': 0}}, "distinct_values": {"cardinality": {"field": aggregation_field}}} # Define selected mapping ds = Datasets().activate_dataset(request.session) dataset = ds.get_index() mapping = ds.get_mapping() date_range = ds.get_date_range() es_m = ES_Manager(dataset, mapping, date_range) for item in es_params: if 'saved_search' in item: s = Search.objects.get(pk=es_params[item]) name = s.description saved_query = json.loads(s.query) es_m.load_combined_query(saved_query) es_m.set_query_parameter('aggs', aggregations) response = es_m.search() # Filter response bucket_filter = '{0}.'.format(original_aggregation_field.lower()) final_bucket = [] for b in response['aggregations']['strings']['buckets']: if bucket_filter in b['key']: fact_name = b['key'].split('.')[-1] b['key'] = fact_name final_bucket.append(b) final_bucket = final_bucket[:aggregation_size] response['aggregations']['distinct_values']['value'] = len(final_bucket) response['aggregations']['strings']['buckets'] = final_bucket normalised_counts,labels = normalise_agg(response, es_m, es_params, 'strings') lexicon = list(set(lexicon+labels)) query_results.append({'name':name,'data':normalised_counts,'labels':labels}) distinct_values.append({'name':name,'data':response['aggregations']['distinct_values']['value']}) es_m.build(es_params) # FIXME # this is confusing for the user if not es_m.is_combined_query_empty(): es_m.set_query_parameter('aggs', aggregations) response = es_m.search() # Filter response bucket_filter = '{0}.'.format(original_aggregation_field.lower()) final_bucket = [] for b in response['aggregations']['strings']['buckets']: if bucket_filter in b['key']: fact_name = b['key'].split('.')[-1] b['key'] = fact_name final_bucket.append(b) final_bucket = final_bucket[:aggregation_size] response['aggregations']['distinct_values']['value'] = len(final_bucket) response['aggregations']['strings']['buckets'] = final_bucket normalised_counts,labels = normalise_agg(response, es_m, es_params, 'strings') lexicon = list(set(lexicon+labels)) query_results.append({'name':'Query','data':normalised_counts,'labels':labels}) distinct_values.append({'name':'Query','data':response['aggregations']['distinct_values']['value']}) data = [a+zero_list(len(query_results)) for a in map(list, zip(*[lexicon]))] data = [['Word']+[query_result['name'] for query_result in query_results]]+data for i,word in enumerate(lexicon): for j,query_result in enumerate(query_results): for k,label in enumerate(query_result['labels']): if word == label: data[i+1][j+1] = query_result['data'][k] logger.set_context('user_name', request.user.username) logger.info('facts_aggregation_queried') except Exception as e: print('-- Exception[{0}] {1}'.format(__name__, e)) logger.set_context('user_name', request.user.username) logger.exception('facts_aggregation_query_failed') table_height = len(data)*15 table_height = table_height if table_height > 500 else 500 return {'data':[data[0]]+sorted(data[1:], key=lambda x: sum(x[1:]), reverse=True),'height':table_height,'type':'bar','distinct_values':json.dumps(distinct_values)}