Esempio n. 1
0
def wc(query):
    stop_words = set(stopwords.words('english'))
    # generating a corpus specific stopword list
    stopset_state_specific = {'review', 'na', 'declassifiedreleased', 'review', 'unclassified', 'confidential',
                              'secret', 'disposition', 'released', 'approved', 'document', 'classification',
                              'restrictions', 'state', 'department', 'date', 'eo', 'handling'}
    stop_set = stop_words.union(stopset_state_specific)
    q = {
        "query": {
            "match": {
                "file": query
            }
        }
    }
    r = es.search(body=q,
                  index=es_index,
                  fields=["file", "body"])
    # switched to return 'body' instead of 'file':
    # 'body' is the portion of the 'file' that has been regex'd by the uploader
    # to include the most relevant information (e.g. excluding headers)
    data = r['hits']['hits'][0]['fields']['body'][0]
    # no_white = re.sub('\s', ' ', data)
    # updated to disallow numbers from the wordcloud
    no_white = re.sub(r'[^A-Za-z\s]', '', data)
    w_c = dict(Counter(word_tokenize(no_white)))
    frequency = []
    for k, v in w_c.iteritems():
        frequency.append(dict({"text": k, "size": v * 3}))
    frequency = filter(lambda x: x['size'] > 3 and x['text'].lower() not in stop_set, frequency)
    return json.dumps(frequency)
Esempio n. 2
0
def geo_endpoint():
    last_query = session.get('last_query', None)
    if last_query is None:
        return json.dumps([])
    query = last_query['query']

    url = '{}/_search'.format(es_path)

    loc_q = {"size": 30000, "filter": {"exists": {"field": "locations"}}}

    q = {"query": {"query_string": {"query": query}}}
    r = es.search(body=q,
                  index=es_index,
                  fields=["entities", "title", "file", "entities"],
                  size=100000)
    data = r
    locations = []
    # for hit in data['hits']['hits']:
    #   print(hit['fields']['file'][0])
    #   print
    #   for location in geodict_lib.find_locations_in_text(re.sub('\s', ' ', hit['_source']['file'])):
    #       for token in location['found_tokens']:
    #       locations.append({'lat':token['lat'],'lon':token['lon'],'name':token['matched_string']})

    # geo = map(lambda x: x['found_tokens'])
    # return json.dumps(locations)
    # print('Number of Hits: ' + str(len(data['hits']['hits'])))

    for hit in data['hits']['hits']:
        entity_locations = []

        entities = json.loads(hit['fields']['entities'][0])

        try:
            for ent in entities:
                if ent['category'] == 'locations':
                    entity_locations.append(ent)
        except:
            locs = []

        try:
            doc_file = str(hit['fields']['file'][0].replace('\n', '<br>'))
        except:
            continue

        try:
            for location in entity_locations:
                locations.append({
                    'lat': location['entity']['lat'],
                    'lon': location['entity']['lon'],
                    'name': location['entity']['placename'],
                    'title': hit['fields']['title'],
                    'file': doc_file
                })
        except:
            continue
            # print('no locations')

    # geo = map(lambda x: x['found_tokens'])
    return json.dumps(locations)
Esempio n. 3
0
def more_like_this(doc_id):
    """
    Returns similar documents
    :param doc_id:
    :return:
    """
    q = {
        "query": {
            "more_like_this": {
                "docs": [{
                    "_index": "dossiers",
                    "_type": "attachment",
                    "_id": doc_id
                }]
            }
        }
    }

    response = es.search(body=q, index=es_index, fields=['title'], size=10)
    results = {'results': []}
    try:
        for r in response['hits']['hits']:
            results['results'].append({
                'id': r['_id'],
                'name': r['fields']['title'][0]
            })
    except (KeyError, IndexError):
        pass

    return jsonify(results)
Esempio n. 4
0
def dps_top_brands(kwargs):
    granularity = chg_granularity(kwargs['granularity'])

    if kwargs['start_date'] > kwargs['end_date']:
        return "wrong end_date"

    qry_field = "paired_brand.keyword"

    date_field = "pairing_created_on"

    cardinal_field = "paired_device_id"

    if kwargs['mno'] == 'all':
        qry_dsl = dps_query_without_mno(granularity, kwargs['trend_qty'],
                                        qry_field, date_field, cardinal_field,
                                        kwargs)
    else:
        match_para = "operator_name"
        qry_dsl = dps_query_with_mno(granularity, kwargs['trend_qty'],
                                     qry_field, date_field, match_para,
                                     cardinal_field, kwargs)

    qry = es.search(index=conf['dps_index'],
                    body=qry_dsl,
                    request_timeout=conf['request_timeout'])

    return qry
Esempio n. 5
0
def wc(query):
    stopset=set(stopwords.words('english'))
    url='http://localhost:9200/dossiers/_search'
    q = {
        "fields" : ["file"],
        "query" : {
            "term" : { "file" : query }
            }
        }
    #r=requests.post(url,data=json.dumps(q))
    r=es.search(body=q,index=DEFAULT_INDEX)
    #r=requests.post(url,data=json.dumps(q))
    data=r
    frequency=[]
    documents=[]
    for hit in data['hits']['hits']:
        text=hit['fields']['file'][0]
        nowhite=re.sub('\s', ' ', text)
        nowhite=re.sub(r'[^\w\s]', '',text)
        wt=word_tokenize(nowhite)
        documents.append(wt)

    docflat=[item for sublist in documents for item in sublist]
    wc=dict(Counter(docflat))
    for k,v in wc.iteritems():
                frequency.append(dict({"text":k,"size":v*3}))
    frequency=filter(lambda x:x['size']>6 and x['text'].lower() not in stopset,frequency)
    return json.dumps(frequency)
Esempio n. 6
0
def url_fetch(query=""):
    if not query:
        last_query = session.get('last_query', None)
        if last_query is not None:
            query = session['last_query']['query']
    stopset = set(stopwords.words('english'))
    q = {"fields": ["file"], "query": {"term": {"file": query}}}
    r = es.search(body=q, index=es_index)
    data = r['hits']['hits']
    urls = []
    pn = []
    for doc in data:
        urls.append(re.findall(r'(https?://[^\s]+)', doc['fields']['file'][0]))
        try:
            for match in phonenumbers.PhoneNumberMatcher(
                    doc['fields']['file'][0], region=None):
                pn.append({
                    'number':
                    phonenumbers.format_number(
                        match.number, phonenumbers.PhoneNumberFormat.E164),
                    'location':
                    geocoder.description_for_number(match.number, "en")
                })
        except KeyError:
            pass
    urls = filter(lambda x: x != [], urls)
    # urls_flat=reduce(lambda x,y: x.extend(y),urls)
    urls_flat = [item for sublist in urls for item in sublist]
    return json.dumps({'urls': dict(Counter(urls_flat)), 'pn': pn})
    def imeis_status(reg_status, qry_field, aggs_size):

        qry_dsl = {
            "aggs": {
                "aggs_1": {
                    "terms": {
                        "field": qry_field,
                        "size": aggs_size,
                        "order": {
                            "_count": "desc"
                        }
                    }
                }
            },
            "size": 0,
            "query": {
                "terms": {
                    qry_field: reg_status
                }
            }
        }

        qry = es.search(index=conf['drs_reg_index'],
                        body=qry_dsl,
                        request_timeout=conf['request_timeout'])

        return qry['aggregations']
Esempio n. 8
0
def wc(query):
        stop_words=set(stopwords.words('english'))
        # generating a corpus specific stopword list
        stopset_state_specific = set(['review','na','declassifiedreleased','review','unclassified','confidential','secret','disposition','released','approved','document','classification','restrictions','state','department','date','eo','handling'])
        stopset = stop_words.union(stopset_state_specific)
        url='http://localhost:9200/dossiers/_search'
        q = {
        "fields" : ["file", "body"], #added body to query
        "query" : {
        "match" : {
        "file" : query
        }
        }
        }
        #r=requests.post(url,data=json.dumps(q))
        r=es.search(body=q,index=DEFAULT_INDEX)
        # swithced to return 'body' instead of 'file' which is the portion of the 'file' that has been regex'd by the uploader
        # to include the most relevant information (e.g. excluding headers)
        data=r['hits']['hits'][0]['fields']['body'][0]
        nowhite=re.sub('\s', ' ', data)
        #updated to disallow numbers from the wordcloud
        nowhite=re.sub(r'[^A-Za-z\s]', '', data)
        wt=word_tokenize(nowhite)
        wc=dict(Counter(wt))
        frequency=[]
        for k,v in wc.iteritems():
            frequency.append(dict({"text":k,"size":v*3}))
        frequency=filter(lambda x:x['size']>3 and x['text'].lower() not in stopset,frequency)
        return json.dumps(frequency)
Esempio n. 9
0
def search_endpoint(query=None, page=None, box_only=False):
    if not query and not page:
        last_query = session.get("last_query", None)
        if last_query:
            query, page = last_query["query"], last_query["page"]
        else:
            # better error
            return abort(404)

    if not page:
        page = 1

    session["last_query"] = {"query": query, "page": page, "ids": []}
    session["history"] = amend_history(session.get("history", list()), session["last_query"])

    # convert pages to records for ES
    start = int(page)
    if start > 1:
        start *= 10

    q = {
        "fields": ["title", "highlight", "entities", "owner"],
        "from": start,
        "query": {"match": {"file": query}},
        "highlight": {"fields": {"file": {}}, "pre_tags": ["<span class='highlight'>"], "post_tags": ["</span>"]},
    }
    raw_response = es.search(body=q, index=DEFAULT_INDEX, df="file", size=10)

    hits = []

    for resp in raw_response["hits"]["hits"]:
        # Store returned ids
        session["last_query"]["ids"].append(resp["_id"])

        if is_owner(resp["fields"]["owner"][0]):
            # Flatten structure for individual hits
            hits.append(
                {
                    "id": resp["_id"],
                    "title": resp["fields"]["title"][0],
                    "highlight": resp["highlight"]["file"][0],
                    "permissions": True,
                }
            )
        else:
            hits.append({"id": resp["_id"], "title": resp["fields"]["title"][0], "permissions": False})

    results = {
        "hits": hits,
        "took": float(raw_response["took"]) / 1000,
        "total": "{:,}".format(raw_response["hits"]["total"]),
        "total_int": int(raw_response["hits"]["total"]),
        "query": query,
        "from": int(page),
    }

    if box_only:
        return render_template("search-results-box.html", results=results)

    return render_template("search-template.html", results=results, history=session["history"])
Esempio n. 10
0
def more_like_this(doc_id):
    ''' Returns similar documents '''
    q = {
      "fields": ["title"],
        "query": {
            "more_like_this" : {
            "docs" : [
            {
                "_index" : "dossiers",
                "_type" : "attachment",
                "_id" : doc_id
            }]
          }
        },
        "size": 10
    }

    response = es.search(body=q, index=DEFAULT_INDEX)
    results = {'results': []}
    try:
        for r in response['hits']['hits']:
            results['results'].append({
                'id': r['_id'],
                'name': r['fields']['title'][0]
                })
    except KeyError, IndexError:
        pass
Esempio n. 11
0
def search_results(query):
    page = int(request.args.get('page', 1))
    per_page = app.config['PER_PAGE']

    body = search_body(query)
    response = es.search(
            index=app.config['ES_INDEX'],
            # doc_type='',
            from_=(page-1)*per_page,
            size=per_page,
            body=body
            )
    search_results = response['hits']['hits']
    results_count = response['hits']['total']

    pagination = Pagination(
            css_framework=app.config['CSS_FRAMEWORK'],
            page=page,
            total=results_count,
            per_page=per_page)
    return render_template('search_results.html',
            query=query,
            search_results=search_results,
            pagination=pagination,
            count=results_count)
Esempio n. 12
0
    def csv(dsl, start, size):
        search_results = es.search(index='suborders',
                                   doc_type='suborders',
                                   scroll='1m',
                                   body=dsl,
                                   _source=[
                                       'order_number', 'suborder_number',
                                       'date_received', 'order_type',
                                       'customer', 'metadata'
                                   ],
                                   size=size,
                                   from_=start)

        sid = search_results['_scroll_id']
        scroll_size = search_results['hits']['total']

        scroll_results = search_results['hits']['hits']

        while scroll_size > 0:
            results = es.scroll(scroll='1m',
                                body={
                                    "scroll": "1m",
                                    "scroll_id": sid
                                })

            scroll_size = len(results['hits']['hits'])

            scroll_results += results['hits']['hits']

        return scroll_results
Esempio n. 13
0
    def incident_type_case_status(kwargs, qry_field, aggs_size,
                                  cardinal_field):
        qry_dsl = {
            "aggs": {
                "aggs_1": {
                    "terms": {
                        "field": qry_field,
                        "size": aggs_size,
                        "order": {
                            "_count": "desc"
                        }
                    },
                    "aggs": {
                        "unique_devices": {
                            "cardinality": {
                                "field":
                                cardinal_field,
                                "precision_threshold":
                                kwargs['precision_threshold']
                            }
                        }
                    }
                }
            },
            "size": 0
        }

        qry = es.search(index=conf['lsds_index'],
                        body=qry_dsl,
                        request_timeout=conf['request_timeout'])

        return qry['aggregations']['aggs_1']['buckets']
Esempio n. 14
0
    def core_total_imeis(kwargs, field3, qry_index):

        qry_dsl = {
            "aggs": {
                "unique_imeis": {
                    "cardinality": {
                        "field": "imei_norm.keyword",
                        "precision_threshold": kwargs['precision_threshold']
                    }
                }
            },
            "size": 0,
            "query": {
                "bool": {
                    "must": [{
                        "exists": {
                            "field": field3
                        }
                    }]
                }
            }
        }

        qry = es.search(index=qry_index,
                        body=qry_dsl,
                        request_timeout=conf['request_timeout'])

        return qry['aggregations']['unique_imeis']['value']
Esempio n. 15
0
def more_like_this(doc_id):
    """
    Returns similar documents
    :param doc_id:
    :return:
    """
    q = {
        "query": {
            "more_like_this": {
                "docs": [
                    {
                        "_index": "dossiers",
                        "_type": "attachment",
                        "_id": doc_id
                    }]
            }
        }
    }

    response = es.search(body=q,
                         index=es_index,
                         fields=['title'],
                         size=10)
    results = {'results': []}
    try:
        for r in response['hits']['hits']:
            results['results'].append({
                'id': r['_id'],
                'name': r['fields']['title'][0]
            })
    except (KeyError, IndexError):
        pass

    return jsonify(results)
Esempio n. 16
0
def url_fetch(query=""):
    #query="http"
    if not query:
        query=session['last_query']['query']
    stopset=set(stopwords.words('english'))
    url='http://localhost:9200/dossiers/_search'
    q = {
        "fields" : ["file"],
        "query" : {
            "term" : { "file" : query }
            }
        }
    #r=requests.post(url,data=json.dumps(q))    
    r=es.search(body=q,index=DEFAULT_INDEX)
    data=r['hits']['hits']
    urls=[]
    pn=[]
    for doc in data:
        urls.append(re.findall(r'(https?://[^\s]+)', doc['fields']['file'][0]))
        try:
            for match in phonenumbers.PhoneNumberMatcher(doc['fields']['file'][0], region=None):
                    pn.append({'number':phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164),'location':geocoder.description_for_number(match.number,"en")})     
        except KeyError:
            pass
    urls=filter(lambda x: x!=[],urls)
    #urls_flat=reduce(lambda x,y: x.extend(y),urls)
    urls_flat=[item for sublist in urls for item in sublist]
    return json.dumps({'urls':dict(Counter(urls_flat)), 'pn':pn})
Esempio n. 17
0
def wc(query):
    stop_words = set(stopwords.words('english'))
    # generating a corpus specific stopword list
    stopset_state_specific = {
        'review', 'na', 'declassifiedreleased', 'review', 'unclassified',
        'confidential', 'secret', 'disposition', 'released', 'approved',
        'document', 'classification', 'restrictions', 'state', 'department',
        'date', 'eo', 'handling'
    }
    stop_set = stop_words.union(stopset_state_specific)
    q = {"query": {"match": {"file": query}}}
    r = es.search(body=q, index=es_index, fields=["file", "body"])
    # switched to return 'body' instead of 'file':
    # 'body' is the portion of the 'file' that has been regex'd by the uploader
    # to include the most relevant information (e.g. excluding headers)
    data = r['hits']['hits'][0]['fields']['body'][0]
    # no_white = re.sub('\s', ' ', data)
    # updated to disallow numbers from the wordcloud
    no_white = re.sub(r'[^A-Za-z\s]', '', data)
    w_c = dict(Counter(word_tokenize(no_white)))
    frequency = []
    for k, v in w_c.iteritems():
        frequency.append(dict({"text": k, "size": v * 3}))
    frequency = filter(
        lambda x: x['size'] > 3 and x['text'].lower() not in stop_set,
        frequency)
    return json.dumps(frequency)
Esempio n. 18
0
def es_search():
    # max_results = request.args.get('max_results', 10)
    # max_results = int(max_results)
    #
    # query = request.args.get('query')
    # entities = term.Term.query.filter(term.Term.id.ilike('%{}%'.format(query))).all()
    #
    # if not entities:
    #     abort(404)
    #
    # # return json.dumps([entity.to_dict() for entity in entities[:max_results]])
    # return jsonify({'results': [entity.to_dict() for entity in entities[:max_results]]})

    # remote_addr = request.environ.get('HTTP_X_REAL_IP', request.remote_addr)

    app.logger.info('{} - {}'.format(request.remote_addr, request.url))

    query = request.args.get('q')

    results = es.search(index=app.config.get('INDEX_NAME'), q=query)

    hits = results['hits']['hits']

    if not hits:
        abort(404)

    return jsonify({'results': hits})
    def device_count(kwargs):

        qry_dsl = {
            "aggs": {
                "unique_devices": {
                    "cardinality": {
                        "field": "device_id",
                        "precision_threshold": kwargs['precision_threshold']
                    }
                }
            },
            "size": 0,
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "registration_status": "Approved"
                        }
                    }]
                }
            }
        }

        qry = es.search(index=conf['drs_reg_index'],
                        body=qry_dsl,
                        request_timeout=conf['request_timeout'])

        return qry['aggregations']
Esempio n. 20
0
def query(index, query):
    if not es:
        return []
    body = {"query": {"multi_match": {"query": query, "fields": ["*"]}}}
    search = es.search(index=index, body=body)
    results = [int(result["_id"]) for result in search["hits"]["hits"]]
    return results
Esempio n. 21
0
def core_registration_status_details(kwargs):

    if kwargs['start_date'] > kwargs['end_date']:
        return "wrong end_date"

    qry_dsl = {
        "aggs": {
            "registration_status": {
                "terms": {
                    "field": "status.keyword",
                    "size": 9
                },
                "aggs": {
                    "unique_brands": {
                        "cardinality": {
                            "field": "brand_name.keyword",
                            "precision_threshold":
                            kwargs['precision_threshold']
                        }
                    },
                    "unique_models": {
                        "cardinality": {
                            "field": "model.keyword",
                            "precision_threshold":
                            kwargs['precision_threshold']
                        }
                    }
                }
            }
        },
        "size": 0,
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "start_date": {
                            "gte": kwargs['start_date'],
                            "lte": kwargs['end_date']
                        }
                    }
                }, {
                    "exists": {
                        "field": "status.keyword"
                    }
                }],
                "must_not": [{
                    "exists": {
                        "field": "end_date"
                    }
                }]
            }
        }
    }

    qry = es.search(index=conf['core_indices']['core_reglist_index'],
                    body=qry_dsl,
                    request_timeout=conf['request_timeout'])

    return qry
Esempio n. 22
0
def viz_endpoint(query):
    url = "http://localhost:9200/dossiers/_search"
    q = {"_source": ["entity"], "fields": ["entities", "title"], "query": {"term": {"file": query}}, "size": 100}
    # r=requests.post(url,data=json.dumps(q))
    r = es.search(body=q, index=DEFAULT_INDEX)
    data = r
    # graph = make_graph(data)
    graph = document_graph(data["hits"]["hits"])
    return json.dumps(graph)
Esempio n. 23
0
def query_index(index, query, page, per_page):
    if not es:
        return [], 0
    search = es.search(
        index=index,
        body={'query': {'multi_match': {'query': query, 'fields': ['*']}},
              'from': 0, 'size': 99})
    ids = [int(hit['_id']) for hit in search['hits']['hits']]
    return ids, search['hits']['total']['value']
Esempio n. 24
0
def autocomplete_cities():
    """Autocomplete for cities."""
    query = request.args.get("query")

    redis_key = f"autocomplete_cities|{query}"

    # Try to find with Redis.
    try:
        result = redis_store.get(redis_key)
        redis_is_connected = True
        if result:
            return jsonify(suggestions=pickle.loads(result))
    except RedisConnectionError:
        redis_is_connected = False

    # Try to find with Elasticsearch.
    try:
        cities = es.search(
            index="airtickets-city-index",
            from_=0,
            size=10,
            doc_type="CityName",
            body={
                "query": {
                    "bool": {
                        "must": {
                            "match_phrase_prefix": {
                                "value": {
                                    "query": query
                                }
                            }
                        }
                    }
                },
                "sort": {
                    "population": {
                        "order": "desc"
                    }
                },
            },
        )
        result = [city["_source"] for city in cities["hits"]["hits"]]
    except (ElasticConnectionError, NotFoundError, AttributeError):
        # Try to find with PostgreSQL.
        cities = (CityName.query.join(
            City.city).filter(CityName.name.like(query + "%")).distinct(
                City.population,
                CityName.city_id).order_by(City.population.desc(),
                                           CityName.city_id).limit(10).all())

        result = [city.autocomplete_serialize() for city in cities]

    if redis_is_connected:
        redis_store.set(redis_key, pickle.dumps(result), 86400)

    return jsonify(suggestions=result)
    def imei_distribution(kwargs):

        qry_dsl = {
            "aggs": {
                "block_cond": {
                    "terms": {
                        "field": "cond_name.keyword",
                        "size": conf['num_core_blk_conds'],
                        "order": {
                            "_count": "desc"
                        }
                    }
                }
            },
            "size": 0,
            "query": {
                "bool": {
                    "must": [
                        {
                            "match_all": {}
                        },
                        {
                            "range": {
                                "block_date": {
                                    "gte": kwargs['start_date'],
                                    "lte": kwargs['end_date']
                                }
                            }
                        },
                        {
                            "exists": {
                                "field": "block_date"
                            }
                        },
                        {
                            "exists": {
                                "field": "start_date"
                            }
                        }

                    ],
                    "must_not": [
                        {
                            "exists": {
                                "field": "end_date"
                            }
                        }
                    ]
                }
            }
        }

        qry = es.search(index=conf['core_indices']['core_classification_data'], body=qry_dsl,
                        request_timeout=conf['request_timeout'])

        return qry
Esempio n. 26
0
def viz_all():
    q = {"query": {"match_all": {}}}
    r = es.search(body=q,
                  index=es_index,
                  fields=["entities", "title"],
                  size=100)
    data = r['hits']['hits']
    graph = document_graph(data)

    return json.dumps(graph)
Esempio n. 27
0
def request_doc(doc_id):
    q = {
            "query" : {
                "match" : {
                    "_id" : doc_id
                    }
                },
            }
    response = es.search(body=q, index=DEFAULT_INDEX)
    return response
Esempio n. 28
0
def search_query():
    search = request.args['query']
    field = request.args.get('fields', default="")

    try:

        p = field.split(',')

        dict_query = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "query_string": {
                                "query": search,
                                "fields": p,
                            }
                        },
                    ],
                    "should": [
                        {
                            "multi_match": {
                                "query": search,
                                "type": "most_fields",
                                "fields": p
                            }
                        },
                    ],
                }
            }
        }

        result = es.search(index="dictionary",
                           doc_type="words",
                           body=dict_query)

        res = result["hits"]["hits"]

        filterd_result = []
        for x in res:
            fina = {}
            fin = x['_source']
            fina['word'] = fin['word']
            fina['meaning'] = fin['meaning']
            fina['synonym'] = fin['synonym']
            fina['antonym'] = fin['antonym']
            fina['word_origin'] = fin['word_origin']
            fina['example'] = fin['example']

            filterd_result.append(fina)
        return jsonify({"msg": "success", "respose": filterd_result})

    except Exception as e:
        print str(e)
        return jsonify({"msg": "error occured", "error": str(e)})
Esempio n. 29
0
def lsds_01_total_reported_devices(kwargs):

    granularity = chg_granularity(kwargs['granularity'])

    if kwargs['start_date'] > kwargs['end_date']:
        return "wrong end_date"

    qry_dsl = {
        "aggs": {
            "time_range": {
                "date_histogram": {
                    "field": "case_reported_date",
                    "interval": granularity
                },
                "aggs": {
                    "unique_devices": {
                        "cardinality": {
                            "field": "case_id",
                            "precision_threshold": conf['precision_threshold']
                        }
                    }
                }
            }
        },
        "size": 0,
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "case_reported_date": {
                            "gte": kwargs['start_date'],
                            "lte": kwargs['end_date']
                        }
                    }
                }],
                "filter": [{
                    "bool": {
                        "should": [{
                            "exists": {
                                "field": "reported_imeis.keyword"
                            }
                        }],
                        "minimum_should_match": 1
                    }
                }]
            }
        }
    }

    qry = es.search(index=conf['lsds_index'],
                    body=qry_dsl,
                    request_timeout=conf['request_timeout'])

    return qry
Esempio n. 30
0
def bulk_search(queries):
    data = tablib.Dataset(headers=['filename', 'id', 'query'])
    for q in queries:
        r = es.search(q=q, fields=['title'], size=100, index="dossiers",
                doc_type="attachment")
        for res in r['hits']['hits']:
            title = res['fields']['title'][0]
            _id = res['_id']
            
            data.append((title, _id, q))

    return data
Esempio n. 31
0
def viz_all():
    q = {
        "fields" : ["entities","title"],
        "query" : {
            "match_all" : {}
            },
        "size": 100
        }
    r = es.search(body=q, index=DEFAULT_INDEX)
    graph = document_graph(r['hits']['hits'])

    return json.dumps(graph)
Esempio n. 32
0
def search_results(query):
    res = es.search(index="microblog", doc_type="post", body={"query": {"match": { "body": query }}})

    post_ids = []
    for hit in res['hits']['hits']:
        app.logger.debug(hit)
        post_ids.append(hit['_source']['id'])
    app.logger.debug(post_ids)
    posts = g.user.followed_posts().filter(Post.id.in_(post_ids))
    return render_template('search_results.html', 
                           query=query,
                           results=posts)
Esempio n. 33
0
def view_search(key):
    res = es.search(index=index, doc_type=_type, body={"query": {"match": {"name": key}}})
    print res
    if res['hits']['hits']:
        for item in res['hits']['hits']:
            e = Employee(name=item['_source']['name'] , job=item['_source']['Job'])
            e.save()
        print "done"
        
        return jsonify({'name':item['_source']['name'], 'Designation':item['_source']['Job'], 'store':'mongo'})
    else:
        return Response('Record Not Found')
Esempio n. 34
0
def history_query():
    """ AND query over all active history terms """
    terms = active_history_terms(session["history"])
    body = {
        "_source": ["entity"],
        "fields": ["entities", "title"],
        "query": {"constant_score": {"filter": {"terms": {"file": terms, "execution": "and"}}}},
    }
    r = es.search(body=body, index=DEFAULT_INDEX, size=100)
    graph = make_response(json.dumps(document_graph(r["hits"]["hits"])))

    return graph
Esempio n. 35
0
 def search(dsl, start, size):
     search_results = es.search(index='suborders',
                                doc_type='suborders',
                                body=dsl,
                                _source=[
                                    'order_number', 'suborder_number',
                                    'date_received', 'order_type',
                                    'current_status', 'customer'
                                ],
                                size=size,
                                from_=start)
     return search_results
Esempio n. 36
0
def drs_registered_imeis_approved(kwargs):

    granularity = chg_granularity(kwargs['granularity'])

    if kwargs['start_date'] > kwargs['end_date']:
        return "wrong end_date"

    qry_dsl = {
        "aggs": {
            "time_range": {
                "date_histogram": {
                    "field": "registration_date",
                    "interval": granularity
                }
            }
        },
        "size": 0,
        "query": {
            "bool": {
                "must": [{
                    "match_all": {}
                }, {
                    "match": {
                        "registration_status.keyword": "Approved"
                    }
                }, {
                    "range": {
                        "registration_date": {
                            "gte": kwargs['start_date'],
                            "lte": kwargs['end_date']
                        }
                    }
                }],
                "filter": [{
                    "bool": {
                        "should": [{
                            "exists": {
                                "field": "imeis.keyword"
                            }
                        }],
                        "minimum_should_match": 1
                    }
                }]
            }
        }
    }

    qry = es.search(index=conf['drs_reg_index'],
                    body=qry_dsl,
                    request_timeout=conf['request_timeout'])

    return qry
Esempio n. 37
0
 def print(dsl, start, size):
     search_results = es.search(index='suborders',
                                doc_type='suborders',
                                body=dsl,
                                _source=[
                                    'suborder_number', 'order_type',
                                    'order_number', 'date_submitted',
                                    'customer', 'metadata',
                                    'multiple_items', 'order_types'
                                ],
                                size=size,
                                from_=start)
     return search_results
Esempio n. 38
0
def drs_single_importer_status(kwargs):

    if kwargs['start_date'] > kwargs['end_date']:
        return "wrong end_date"

    qry_dsl = {
        "aggs": {
            "aggs_1": {
                "terms": {
                    "field": "registration_status.keyword",
                    "size": 15,
                    "order": {
                        "_count": "desc"
                    }
                }
            }
        },
        "size": 0,
        "query": {
            "bool": {
                "must": [{
                    "match": {
                        "registered_user": kwargs['importer_name']
                    }
                }, {
                    "range": {
                        "registration_date": {
                            "gte": kwargs['start_date'],
                            "lte": kwargs['end_date']
                        }
                    }
                }],
                "filter": {
                    "bool": {
                        "should": [{
                            "exists": {
                                "field": "registration_status"
                            }
                        }],
                        "minimum_should_match": 1
                    }
                }
            }
        }
    }

    qry = es.search(index=conf['drs_reg_index'],
                    body=qry_dsl,
                    request_timeout=conf['request_timeout'])

    return qry
Esempio n. 39
0
def match_all():
    try:
        query = {"query": {"match_all": {}}}
        result = es.search(index="dict_word", doc_type="word", body=query)

        res = result["hits"]["hits"]
        filtered_result = []
        for i in res:
            filtered_result.append(i["_source"])

        return jsonify({"response": "success", "data": filtered_result})
    except Exception as e:
        print str(e)
        return jsonify({"response": "failure", "error": str(e)})
Esempio n. 40
0
def request_doc(doc_id):
    """
    Searches elastic index for a document matching a particular ID.
    :param str doc_id: A specific document ID
    :return: results of elastic search matching doc_id
    """
    q = {
        "query": {
            "match": {
                "_id": doc_id
            }
        },
    }
    return es.search(body=q, index=es_index)
Esempio n. 41
0
def request_doc(doc_id):
    """
    Searches elastic index for a document matching a particular ID.
    :param str doc_id: A specific document ID
    :return: results of elastic search matching doc_id
    """
    q = {
        "query": {
            "match": {
                "_id": doc_id
            }
        },
    }
    return es.search(body=q, index=es_index)
Esempio n. 42
0
def viz_all():
    q = {
        "query": {
            "match_all": {}
        }
    }
    r = es.search(body=q,
                  index=es_index,
                  fields=["entities", "title"],
                  size=100)
    data = r['hits']['hits']
    graph = document_graph(data)

    return json.dumps(graph)
Esempio n. 43
0
def match_all():
    try:
        dict_query = {"query": {"match_all": {}}}
        result = es.search(index="dictionary",
                           doc_type="words",
                           body=dict_query)
        res = result["hits"]["hits"]
        filtered_result = []
        for x in res:
            fin = x['_source']
            filtered_result.append(fin)
        return jsonify({"msg": "success", "response": filtered_result})
    except Exception as e:
        print str(e)
        return jsonify({"msg": "error_occured", "error": str(e)})
Esempio n. 44
0
def more_like_this(doc_id):
    """ Returns similar documents """
    q = {
        "fields": ["title"],
        "query": {"more_like_this": {"docs": [{"_index": "dossiers", "_type": "attachment", "_id": doc_id}]}},
        "size": 10,
    }

    response = es.search(body=q, index=DEFAULT_INDEX)
    results = {"results": []}
    try:
        for r in response["hits"]["hits"]:
            results["results"].append({"id": r["_id"], "name": r["fields"]["title"][0]})
    except KeyError, IndexError:
        pass
Esempio n. 45
0
def stolen_imeis_on_network(kwargs):

    qry_dsl = {
        "aggs": {
            "MNOs": {
                "terms": {
                    "field": "mno_operator.keyword",
                    "size": conf['num_of_mnos'],
                    "order": {
                        "_count": "desc"
                    }
                }
            }
        },
        "size": 0,
        "query": {
            "bool": {
                "filter": [{
                    "match_all": {}
                }, {
                    "match": {
                        "triplet_year": kwargs['trend_year']
                    }
                }, {
                    "match": {
                        "triplet_month": kwargs['trend_month']
                    }
                }, {
                    "bool": {
                        "should": [{
                            "match_phrase": {
                                "blacklist_reasons.keyword":
                                "{\"IMEI in local stolen list\"}"
                            }
                        }],
                        "minimum_should_match":
                        1
                    }
                }]
            }
        }
    }

    qry = es.search(index=conf['core_indices']['join_core_mno-blacklist'],
                    body=qry_dsl,
                    request_timeout=conf['request_timeout'])

    return qry
Esempio n. 46
0
def search_results(query):
    res = es.search(index="microblog",
                    doc_type="post",
                    body={"query": {
                        "match": {
                            "body": query
                        }
                    }})

    post_ids = []
    for hit in res['hits']['hits']:
        app.logger.debug(hit)
        post_ids.append(hit['_source']['id'])
    app.logger.debug(post_ids)
    posts = g.user.followed_posts().filter(Post.id.in_(post_ids))
    return render_template('search_results.html', query=query, results=posts)
Esempio n. 47
0
def es_search2():
    if not app.config.get('IS_ES_INDEX'):
        return 'Sorry, you need enable Elasticsearch first.'

    app.logger.info('{} - {}'.format(request.remote_addr, request.url))
    query = request.args.get('q')
    results = es.search(index=app.config.get('ES_INDEX_NAME'),
                        doc_type=app.config.get('ES_TYPE_NAME'),
                        q=query)
    hits = results['hits']['hits']

    entries = []
    for hit in hits:
        entries.append(Entry.get(Entry.id == hit['_id']))

    return render_template('search.jinja2', entries=entries, search=query)
Esempio n. 48
0
def get_order():
    results = []
    date = request.get_json()
    res = es.search(index="orders",
                    body={"query": {
                        "term": {
                            "date": f"{date['date']}"
                        }
                    }})

    for hit in res['hits']['hits']:
        result = {}
        src = hit['_source']
        order_date = src['date']
        order = Mongo.get_doc_by_id('orders', src['id_order'])

        for part in order:
            order_id_shop = part['id_shop']
            order_id_stock = part['id_stock']
            order_goods = part['goods']

        stock_info = Postgres.return_stock_info(order_id_stock)
        order_stock_title = stock_info[0]
        order_stock_address = stock_info[1]

        shop_info = Mongo.get_doc_by_part('shops', order_id_shop)

        for info in shop_info:
            order_shop_title = info['title']
            order_shop_address = info['address']

        result.update({'Дата заказа': order_date})
        result.update({'Название магазина': order_shop_title})
        result.update({'Адрес магазина': order_shop_address})
        result.update({'Название склада': order_stock_title})
        result.update({'Aдрес склада': order_stock_address})
        result.update({'Товары': {}})

        for goods in order_goods:
            result['Товары'].update({goods: order_goods[goods]})

        results.append(result)

    for i in results:
        print(i)

    return '.'
Esempio n. 49
0
def wc(query):
    stopset = set(stopwords.words("english"))
    url = "http://localhost:9200/dossiers/_search"
    q = {"fields": ["file"], "query": {"term": {"file": query}}}
    # r=requests.post(url,data=json.dumps(q))
    r = es.search(body=q, index=DEFAULT_INDEX)
    data = r["hits"]["hits"][0]["fields"]["file"][0]

    nowhite = re.sub("\s", " ", data)
    nowhite = re.sub(r"[^\w\s]", "", data)
    wt = word_tokenize(nowhite)
    wc = dict(Counter(wt))
    frequency = []
    for k, v in wc.iteritems():
        frequency.append(dict({"text": k, "size": v * 3}))
    frequency = filter(lambda x: x["size"] > 3 and x["text"].lower() not in stopset, frequency)
    return json.dumps(frequency)
Esempio n. 50
0
def viz_endpoint(query):
    # url = '{}/_search'.format(es_path)
    q = {
        "_source": ["entity"],
        "fields": ["entities", "title"],
        "query": {
            "match": {
                "file": query
            }
        },
        "size": 150
    }

    r = es.search(body=q, index=es_index)
    data = r['hits']['hits']
    graph = document_graph(data)
    return json.dumps(graph)
Esempio n. 51
0
def bulk_search(queries):
    """

    :param list queries: List of elasticsearch queries
    :return tablib.Dataset:
    """
    data = tablib.Dataset(headers=['filename', 'id', 'query'])
    for q in queries:
        r = es.search(q=q, fields=['title'], size=100, index=es_index,
                      doc_type="attachment")
        for res in r['hits']['hits']:
            title = res['fields']['title'][0]
            _id = res['_id']
            
            data.append((title, _id, q))

    return data
Esempio n. 52
0
def geo_endpoint():
    query=session['last_query']['query']
    url='http://localhost:9200/dossiers/_search'
    q = {
        "fields" : ["file"],
        "query" : {
            "term" : { "file" : query }
            }
        }
    #r=requests.post(url,data=json.dumps(q))
    r=es.search(body=q,index=DEFAULT_INDEX)
    data=r
    locations=[]
    for hit in data['hits']['hits']:
        for location in geodict_lib.find_locations_in_text(re.sub('\s', ' ', str(hit['fields']['file']))):
            for token in location['found_tokens']:
                locations.append({'lat':token['lat'],'lon':token['lon'],'name':token['matched_string']})
    
    #geo=map(lambda x: x['found_tokens'])
    return json.dumps(locations)
Esempio n. 53
0
def history_query():
    """
    AND query over all active history terms
    """
    terms = active_history_terms(session['history'])
    body = {
        "_source": ["entity"],
        "fields": ["entities", "title"],
        "query": {
            "constant_score": {
                "filter": {
                    "terms": {
                        "file": terms,
                        "execution": "and"
                    }
                }
            }
        }
    }
    r = es.search(body=body, index=es_index, size=100)
    data = r['hits']['hits']
    graph = make_response(json.dumps(document_graph(data)))

    return graph
Esempio n. 54
0
def serve_timeline(query=None, page=None, box_only=True, dates={}):


    if request.method == "POST":
        json_dict = request.get_json()
        #print json_dict
        #print type(json_dict)


    dates = json_dict['dates']
    startdate = dates[0][0:10]
    enddate = dates[1][0:10]

    if startdate == enddate:
        startdate = "1973-01-01"
        enddate = "1974-01-01"
    #print startdate, enddate

    #print 'running a new query...'

    if not query and not page:
        last_query = session.get('last_query', None)
        if last_query:
            query, page = last_query['query'], last_query['page']
        else:
            # better error
            return abort(404)

    if not page:
        page = 1

    session['last_query'] = {'query': query, 'page': page, 'ids': []}
    # convert pages to records for ES
    start = int(page)
    if start > 1:
        start *= 10

    q = {
            "fields": ["title", "highlight", "entities", "owner", "date"],
            "from": start,
            "query" : {
                "match" : {
                    "file" : query
                    }
                },
                "filter":{
                "range" : {
                    "date" : {
                        "gte": startdate,
                        "lte": enddate,
                        "format": "yyyy-MM-dd"
                }
            }
            },
            "highlight": { "fields": { "file": { } },
                "pre_tags" : ["<span class='highlight'>"],
                "post_tags" : ["</span>"]
                }
            }

    raw_response = es.search(body=q, index=DEFAULT_INDEX,
            df="file",
            size=10)

    #print q
    #print raw_response

    hits = []

    for resp in raw_response['hits']['hits']:

        # Store returned ids
        session['last_query']['ids'].append(resp['_id'])

        if is_owner(resp['fields']['owner'][0]):
            # Flatten structure for individual hits
            hits.append({'id': resp['_id'],
                'title': resp['fields']['title'][0],
                'highlight': resp['highlight']['file'][0],
                'permissions': True
                })
        else:
            hits.append({'id': resp['_id'],
                'title': resp['fields']['title'][0],
                'permissions': False
                })



    results = {
            'hits': hits,
            'took': float(raw_response['took'])/1000,
            'total': "{:,}".format(raw_response['hits']['total']),
            'total_int': int(raw_response['hits']['total']),
            'query': query,
            'from': int(page)
            }

    if box_only:
        return render_template('search-results-box.html', results=results)

    return render_template('search-template.html', results=results)
Esempio n. 55
0
def timeline_new(query=None, page=None, box_only=False):
    if not query and not page:
        last_query = session.get('last_query', None)
        if last_query:
            query, page = last_query['query'], last_query['page']
        else:
            # better error
            return abort(404)

    if not page:
        page = 1

    session['last_query'] = {'query': query, 'page': page, 'ids': []}
    # convert pages to records for ES
    start = int(page)
    if start > 1:
        start *= 10



    q_daterange = {
             
        "aggs" : {

                "max_date" : { "max" : { "field" : "date" } },
                "min_date" : { "min" : { "field" : "date" } }
            }
        }

    response = es.search(body=q_daterange, index=DEFAULT_INDEX)

    print response['aggregations']['min_date']
    print response['aggregations']['max_date']

    print
    min_date_datetime = round_month_down(datetime.datetime.strptime(response['aggregations']['min_date']['value_as_string'], "%Y-%m-%dT%H:%M:%S.%fZ"))
    max_date_datetime = round_month_up(datetime.datetime.strptime(response['aggregations']['max_date']['value_as_string'], "%Y-%m-%dT%H:%M:%S.%fZ"))
    min_date = min_date_datetime.strftime(format="%Y-%m-%d")
    max_date = max_date_datetime.strftime(format="%Y-%m-%d")
    time_delta = week_delta(min_date_datetime,max_date_datetime)
    rng = pd.date_range(min_date, periods=time_delta, freq='w')
    rng = rng.tolist()
    rng = [date + datetime.timedelta(days=1) for date in rng]
    rng = [date.strftime("%Y-%m-%d") for date in rng]
    rngframe = pd.DataFrame(index=rng)

    timeline_minimum = min_date_datetime - datetime.timedelta(days=7)
    timeline_minimum = timeline_minimum.strftime(format="%Y-%m-%d")

    print min_date
    print max_date


    q = {
            "fields": ["title", "highlight", "entities", "owner", "date"],
            "from": start,
            "query" : {
                "match" : {
                    "file" : query
                    }
                },

            "highlight": { "fields": { "file": { } },
                "pre_tags" : ["<span class='highlight'>"],
                "post_tags" : ["</span>"]
                },
             
        "aggs" : {
                "articles_over_time" : {
                    "date_histogram" : {
                        "field" : "date",
                        "interval" : "week"
                    }},
                "max_date" : { "max" : { "field" : "date" } },
                "min_date" : { "min" : { "field" : "date" } }
            }
        }


    response = es.search(body=q, index=DEFAULT_INDEX)

    print response['aggregations']['articles_over_time']['buckets']

    df = pd.DataFrame(response['aggregations']['articles_over_time']['buckets'])
    df['Date'] = df.key_as_string.apply(lambda x: str(x[:10]))
    df.columns = ['Count','key','key_as_string','Date']
    df = df.drop(['key','key_as_string'], axis=1)
    df = df.set_index('Date')

    output = rngframe.join(df, how="left")
    output = output.fillna(0)
    output = output.reset_index()
    output.columns = ['Date','Count']


    date_count_json = output.to_json(orient='records')

    out = {'date_data': date_count_json, 'time_min': timeline_minimum}

    print json.dumps(out)

    return json.dumps(out)
Esempio n. 56
0
def geo_endpoint():
    query=session['last_query']['query']
    #print 'Query: ' + query
    url='http://localhost:9200/dossiers/_search'

    loc_q = {
        "size" : 30000,
        "filter" : {
            "exists" : { "field" : "locations" }
        }
    }

    q = {
        "size" : 100000,
        "fields" : ["entities","title","file","entities"],
        "query" : {
            "query_string" : { "query" : query }
            }
          }
    #r=requests.post(url,data=json.dumps(q))
    r=es.search(body=q,index=DEFAULT_INDEX)
    data=r
    locations=[]
 #   for hit in data['hits']['hits']:
 #       print hit['fields']['file'][0]
 #       print
 #       for location in geodict_lib.find_locations_in_text(re.sub('\s', ' ', hit['_source']['file'])):
 #           for token in location['found_tokens']:
 #               locations.append({'lat':token['lat'],'lon':token['lon'],'name':token['matched_string']})
    
    #geo=map(lambda x: x['found_tokens'])
#    return json.dumps(locations)
    #print 'Number of Hits: ' + str(len(data['hits']['hits']))

    for hit in data['hits']['hits']:
        entity_locations = []

        entities = json.loads(hit['fields']['entities'][0])

        try:
            for ent in entities:
                if ent['category'] == 'locations':
                    entity_locations.append(ent)
        except:
            locs = []

        try:
            doc_file = str(hit['fields']['file'][0].replace('\n','<br>'))
        
        except:
            continue

        try:
            for location in entity_locations:
                locations.append({'lat':location['entity']['lat'],'lon':location['entity']['lon'],
                    'name':location['entity']['placename'], 'title': hit['fields']['title'],
                    'file': doc_file})
        except: 
            continue
            # print 'no locations'
    
    #geo=map(lambda x: x['found_tokens'])
    return json.dumps(locations)
Esempio n. 57
0
def serve_geo_new(query=None, page=None, box_only=True, bounds={}):


    if request.method == "POST":
        json_dict = request.get_json()
        print json_dict
        print type(json_dict)
        try: 
            bounds = json_dict['bounds']['bounds']
            southwest_lat = bounds['southwest_lat']
            southwest_lon = bounds['southwest_lon']
            northeast_lat = bounds['northeast_lat']
            northeast_lon = bounds['northeast_lon']
        
        except:
            southwest_lat = -84
            southwest_lon = -170 
            northeast_lat = 85 
            northeast_lon =189

    print json_dict
    print 'running a new query...'

    if not query and not page:
        last_query = session.get('last_query', None)
        if last_query:
            query, page = last_query['query'], last_query['page']
        else:
            # better error
            return abort(404)

    if not page:
        page = 1

    session['last_query'] = {'query': query, 'page': page, 'ids': []}
    # convert pages to records for ES
    start = int(page)
    if start > 1:
        start *= 10

    q = { 
       "fields": ["title", "highlight", "entities", "owner", "body"], 
       "from": start,
       "query":{  
          "filtered":{  
             "query":{  
                "match":{  
                   "file": query
                }
             },
             "filter":{  
                "geo_bounding_box":{  
                   "locs":{  
                      "top_left":{  
                         "lat": northeast_lat, # top_lat,
                         "lon": southwest_lon, #top_lon
                      },
                      "bottom_right":{  
                         "lat": southwest_lat, #bottom_lat,
                         "lon": northeast_lon, #bottom_lon
                      }
                   }
                }
             }
          }
       },
       "highlight":{  
          "fields":{  
             "file":{  

             }
          },
          "pre_tags":[  
             "<span class='highlight'>"
          ],
          "post_tags":[  
             "</span>"
          ]
       }
    }


    raw_response = es.search(body=q, index=DEFAULT_INDEX,
            df="file",
            size=10)

    hits = []

    for resp in raw_response['hits']['hits']:
        # Store returned ids
        session['last_query']['ids'].append(resp['_id'])

        text = resp['fields']['body'][0]
        text = re.sub('\\n\\n', '\\n', text)
        text = re.sub('\\n', '<br>', text)

        if is_owner(resp['fields']['owner'][0]):
            # Flatten structure for individual hits
            hits.append({'id': resp['_id'],
                'title': resp['fields']['title'][0],
                'highlight': resp['highlight']['file'][0],
                'permissions': True,
                'body': text
                })
        else:
            hits.append({'id': resp['_id'],
                'title': resp['fields']['title'][0],
                'permissions': False
                })


    results = {
            'hits': hits,
            'took': float(raw_response['took'])/1000,
            'total': "{:,}".format(raw_response['hits']['total']),
            'total_int': int(raw_response['hits']['total']),
            'query': query,
            'from': int(page)
            }

    if box_only:
        return render_template('search-results-map.html', results=results)

    return render_template('search-template.html', results=results)
Esempio n. 58
0
def serve_clusters(query=None,page=None, box_only=True, dates={},documents={}):
    if request.method == "POST":
        json_dict = request.get_json()
    
    if not query and not page:
        last_query = session.get('last_query', None)
    if last_query:
        query, page = last_query['query'], last_query['page']
    else:
        # better error
        return abort(404)

    q={   
          "query": {

            "bool": {
              "must": [
                {
                  "match": {
                    "file": query
                  }
                },
                {
                  "terms": {
                    "_id":json_dict['documents']
                  }
                }
              ]
            }
          },
          "fields": ["title", "highlight", "entities", "owner", "date"],
          "highlight": {
            "fields": {
              "file": {
                "number_of_fragments": 1,
                "pre_tags" : ["<span class='highlight'>"],
                "post_tags" : ["</span>"]
              }
            }
          }
        }

    raw_response = es.search(body=q, index=DEFAULT_INDEX,
            df="file",
            size=10)

    hits = []

    for resp in raw_response['hits']['hits']:

        # Store returned ids
        session['last_query']['ids'].append(resp['_id'])

        if is_owner(resp['fields']['owner'][0]):
            # Flatten structure for individual hits
            hits.append({'id': resp['_id'],
                'title': resp['fields']['title'][0],
                'highlight': resp['highlight']['file'][0],
                'permissions': True
                })
        else:
            hits.append({'id': resp['_id'],
                'title': resp['fields']['title'][0],
                'permissions': False
                })


    results = {
            'hits': hits,
            'took': float(raw_response['took'])/1000,
            'total': "{:,}".format(raw_response['hits']['total']),
            'total_int': int(raw_response['hits']['total']),
            'query': query,
            'from': int(page)
            }

    if box_only:
        return render_template('search-results-box.html', results=results)

    return render_template('search-template.html', results=results)
Esempio n. 59
0
def view_elastic_data():
    res = es.search(index=index, doc_type=_type, body={"query": {"match_all": {}}})
    return jsonify(res)