Exemple #1
0
def save(request):
    logger = LogManager(__name__, 'SAVE SEARCH')

    ds = Datasets().activate_dataset(request.session)
    es_m = ds.build_manager(ES_Manager)

    es_params = request.POST
    es_m.build(es_params)
    combined_query = es_m.get_combined_query()

    try:
        q = combined_query
        desc = request.POST['search_description']
        s_content = json.dumps([request.POST[x] for x in request.POST.keys() if 'match_txt' in x])
        search = Search(author=request.user,search_content=s_content,description=desc,dataset=Dataset.objects.get(pk=int(request.session['dataset'])),query=json.dumps(q))
        search.save()
        logger.set_context('user_name', request.user.username)
        logger.set_context('search_id', search.id)
        logger.info('search_saved')

    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.set_context('es_params', es_params)
        logger.exception('search_saving_failed')

    return HttpResponse()
Exemple #2
0
    def _get_tags_for_text_index(self, text, alignment, highlight_data):
        logger = LogManager(__name__, '_GET_TAGS_FOR_TEXT_INDEX')
        data_mapping = {index: datum for index, datum in enumerate(highlight_data)}
        data_index_to_spans = [datum['spans'] for datum in highlight_data]

        text_index_to_data_index = [[] for i in range(len(text))]
        for data_index, spans in enumerate(data_index_to_spans):
            for span in spans:
                for text_index in range(*span):
                    try:
                        text_index_to_data_index[alignment[text_index]].append(data_index)
                    except Exception as e:
                        # Throws exception when index out of range
                        # For example Gets index out of range if in _derive_highlight_data while uses >= instead of >
                        print('-- Exception[{0}] {1}'.format(__name__, e))
                        logger.set_context('text', text)
                        logger.exception('_get_tags_for_text_index try catch execption')

                        

        text_index_to_data_index = [frozenset(data_indices) for data_indices in text_index_to_data_index]

        spans_to_tags = [(spans, self._get_tag_from_highlight_data([data_mapping[data_index] for data_index in data_indices]))
                         for spans, data_indices in self._get_spans_to_data_indices(text_index_to_data_index)]

        return spans_to_tags
Exemple #3
0
    def remove_facts_from_document(self, rm_facts_dict, bs=7500):
        '''remove a certain fact from all documents given a [str]key and [str]val'''
        logger = LogManager(__name__, 'FACT MANAGER REMOVE FACTS')

        try:
            # Clears readonly block just in case the index has been set to read only
            self.es_m.clear_readonly_block()

            query = self._fact_deletion_query(rm_facts_dict)
            self.es_m.load_combined_query(query)
            response = self.es_m.scroll(size=bs, field_scroll=self.field)
            scroll_id = response['_scroll_id']
            total_docs = response['hits']['total']
            docs_left = total_docs  # DEBUG
            print('Starting.. Total docs - ', total_docs)  # DEBUG
            batch = 0
            while total_docs > 0:
                print('Docs left:', docs_left)  # DEBUG
                data = ''
                for document in response['hits']['hits']:
                    new_field = []  # The new facts field
                    for fact in document['_source'][self.field]:
                        # If the fact name is in rm_facts_dict keys
                        if fact["fact"] in rm_facts_dict:
                            # If the fact value is not in the delete key values
                            if fact['str_val'] not in rm_facts_dict.getlist(
                                    fact["fact"]):
                                new_field.append(fact)
                        else:
                            new_field.append(fact)
                    # Update dataset
                    data += json.dumps({
                        "update": {
                            "_id": document['_id'],
                            "_type": document['_type'],
                            "_index": document['_index']
                        }
                    }) + '\n'
                    document = {'doc': {self.field: new_field}}
                    data += json.dumps(document) + '\n'
                response = self.es_m.scroll(scroll_id=scroll_id,
                                            size=bs,
                                            field_scroll=self.field)
                total_docs = len(response['hits']['hits'])
                docs_left -= bs  # DEBUG
                scroll_id = response['_scroll_id']
                self.es_m.plain_post_bulk(self.es_m.es_url, data)
            print('DONE')  # DEBUG

            logger.set_context('docs_left', total_docs)
            logger.set_context('batch', batch)
            logger.info('remove_facts_from_document')
        except:
            print(traceback.format_exc())
            logger.set_context('es_params', self.es_params)
            logger.exception('remove_facts_from_document_failed')
Exemple #4
0
def delete(request):

    logger = LogManager(__name__, 'DELETE SEARCH')
    search_id = request.GET['pk']
    logger.set_context('user_name', request.user.username)
    logger.set_context('search_id', search_id)
    try:
        Search.objects.get(pk=search_id).delete()
        logger.info('search_deleted')

    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.exception('search_deletion_failed')

    return HttpResponse(search_id)
Exemple #5
0
def delete(request):
    post_data = json.loads(request.POST['data'])
    logger = LogManager(__name__, 'DELETE SEARCH')
    search_ids = post_data['pks']
    logger.set_context('user_name', request.user.username)
    logger.set_context('search_ids', search_ids)
    try:
        for search_id in search_ids:
            Search.objects.get(pk=search_id).delete()
            logger.info('search_deleted:' + search_id)

    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.exception('search_deletion_failed')

    return HttpResponse()
Exemple #6
0
def delete(request):
    post_data = json.loads(request.POST['data'])
    logger = LogManager(__name__, 'DELETE SEARCH')
    search_ids = post_data['pks']
    logger.set_context('user_name', request.user.username)
    logger.set_context('search_ids', search_ids)
    try:
        for search_id in search_ids:
            Search.objects.get(pk=search_id).delete()
            logger.info('search_deleted:' + search_id)

    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.exception('search_deletion_failed')

    return HttpResponse()
Exemple #7
0
def save(request):
    logger = LogManager(__name__, 'SAVE SEARCH')

    ds = Datasets().activate_datasets(request.session)
    es_m = ds.build_manager(ES_Manager)

    es_params = request.POST
    es_m.build(es_params)
    combined_query = es_m.get_combined_query()

    try:
        q = combined_query
        desc = request.POST['search_description']
        s_content = {}

        # make json
        for x in request.POST.keys():
            if 'match_txt' in x:
                # get the ID of the field, eg match_txt_1 returns 1 match_txt_1533 returns 1533
                field_id = x.rsplit("_", 1)[-1]
                match_field = request.POST['match_field_' + field_id]
                if match_field in s_content.keys():
                    s_content[match_field].append(request.POST[x])
                else:
                    s_content[match_field] = [request.POST[x]]

        search = Search(author=request.user,
                        search_content=json.dumps(s_content),
                        description=desc,
                        query=json.dumps(q))
        logger.info('Saving search for datasets: {}'.format(
            request.session['dataset']))
        search.save()
        for dataset_id in request.session['dataset']:
            dataset = Dataset.objects.get(pk=int(dataset_id))
            search.datasets.add(dataset)
        search.save()
        logger.set_context('user_name', request.user.username)
        logger.set_context('search_id', search.id)
        logger.info('search_saved')
    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.set_context('es_params', es_params)
        logger.exception('search_saving_failed')
    return HttpResponse()
Exemple #8
0
def save(request):
    logger = LogManager(__name__, 'SAVE SEARCH')

    ds = Datasets().activate_datasets(request.session)
    es_m = ds.build_manager(ES_Manager)

    es_params = request.POST
    es_m.build(es_params)
    combined_query = es_m.get_combined_query()

    try:
        q = combined_query
        desc = request.POST['search_description']
        s_content = {}

        # make json
        for x in request.POST.keys():
            if 'match_txt' in x:
                # get the ID of the field, eg match_txt_1 returns 1 match_txt_1533 returns 1533
                field_id = x.rsplit("_", 1)[-1]
                match_field = request.POST['match_field_' + field_id]
                if match_field in s_content.keys():
                    s_content[match_field].append(request.POST[x])
                else:
                    s_content[match_field] = [request.POST[x]]

        search = Search(author=request.user, search_content=json.dumps(s_content), description=desc,
                        query=json.dumps(q))
        logger.info('Saving search for datasets: {}'.format(request.session['dataset']))
        search.save()
        for dataset_id in request.session['dataset']:
            dataset = Dataset.objects.get(pk=int(dataset_id))
            search.datasets.add(dataset)
        search.save()
        logger.set_context('user_name', request.user.username)
        logger.set_context('search_id', search.id)
        logger.info('search_saved')
    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.set_context('es_params', es_params)
        logger.exception('search_saving_failed')
    return HttpResponse()
Exemple #9
0
def facts_agg(es_params, request):
    logger = LogManager(__name__, 'FACTS AGGREGATION')

    distinct_values = []
    query_results = []
    lexicon = []
    aggregation_data = es_params['aggregate_over']
    aggregation_data = json.loads(aggregation_data)
    original_aggregation_field = aggregation_data['path']
    aggregation_field = 'texta_link.facts'

    try:
        aggregation_size = 50
        aggregations = {"strings": {es_params['sort_by']: {"field": aggregation_field, 'size': 0}},
                        "distinct_values": {"cardinality": {"field": aggregation_field}}}

        # Define selected mapping
        ds = Datasets().activate_dataset(request.session)
        dataset = ds.get_index()
        mapping = ds.get_mapping()
        date_range = ds.get_date_range()
        es_m = ES_Manager(dataset, mapping, date_range)

        for item in es_params:
            if 'saved_search' in item:
                s = Search.objects.get(pk=es_params[item])
                name = s.description
                saved_query = json.loads(s.query)
                es_m.load_combined_query(saved_query)
                es_m.set_query_parameter('aggs', aggregations)
                response = es_m.search()

                # Filter response
                bucket_filter = '{0}.'.format(original_aggregation_field.lower())
                final_bucket = []
                for b in response['aggregations']['strings']['buckets']:
                    if bucket_filter in b['key']:
                        fact_name = b['key'].split('.')[-1]
                        b['key'] = fact_name
                        final_bucket.append(b)
                final_bucket = final_bucket[:aggregation_size]
                response['aggregations']['distinct_values']['value'] = len(final_bucket)
                response['aggregations']['strings']['buckets'] = final_bucket

                normalised_counts,labels = normalise_agg(response, es_m, es_params, 'strings')
                lexicon = list(set(lexicon+labels))
                query_results.append({'name':name,'data':normalised_counts,'labels':labels})
                distinct_values.append({'name':name,'data':response['aggregations']['distinct_values']['value']})


        es_m.build(es_params)
        # FIXME
        # this is confusing for the user
        if not es_m.is_combined_query_empty():
            es_m.set_query_parameter('aggs', aggregations)
            response = es_m.search()

            # Filter response
            bucket_filter = '{0}.'.format(original_aggregation_field.lower())
            final_bucket = []
            for b in response['aggregations']['strings']['buckets']:
                if bucket_filter in b['key']:
                    fact_name = b['key'].split('.')[-1]
                    b['key'] = fact_name
                    final_bucket.append(b)
            final_bucket = final_bucket[:aggregation_size]
            response['aggregations']['distinct_values']['value'] = len(final_bucket)
            response['aggregations']['strings']['buckets'] = final_bucket

            normalised_counts,labels = normalise_agg(response, es_m, es_params, 'strings')
            lexicon = list(set(lexicon+labels))
            query_results.append({'name':'Query','data':normalised_counts,'labels':labels})
            distinct_values.append({'name':'Query','data':response['aggregations']['distinct_values']['value']})

        data = [a+zero_list(len(query_results)) for a in map(list, zip(*[lexicon]))]
        data = [['Word']+[query_result['name'] for query_result in query_results]]+data

        for i,word in enumerate(lexicon):
            for j,query_result in enumerate(query_results):
                for k,label in enumerate(query_result['labels']):
                    if word == label:
                        data[i+1][j+1] = query_result['data'][k]

        logger.set_context('user_name', request.user.username)
        logger.info('facts_aggregation_queried')

    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.set_context('user_name', request.user.username)
        logger.exception('facts_aggregation_query_failed')

    table_height = len(data)*15
    table_height = table_height if table_height > 500 else 500
    return {'data':[data[0]]+sorted(data[1:], key=lambda x: sum(x[1:]), reverse=True),'height':table_height,'type':'bar','distinct_values':json.dumps(distinct_values)}