Ejemplo n.º 1
0
def analyze():
	# return "Analysis"
	last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update']
	content = dict(
		bookmarked=query.run_query(Q("term",bookmarked=True), index=['projects','publications'])[:1000].execute(),
		obj1=query.run_query(Q("term",objectives="objective1"), index=['projects','publications'])[:1000].execute(),
		obj2=query.run_query(Q("term",objectives="objective2"), index=['projects','publications'])[:1000].execute(),
		obj3=query.run_query(Q("term",objectives="objective3"), index=['projects','publications'])[:1000].execute(),
		obj4=query.run_query(Q("term",objectives="objective4"), index=['projects','publications'])[:1000].execute(),
	)
	formdata = dict(
		type=-1,
		query=-1,
		index=-1,
		topic=-1,
		element=-1,
		status=-1,
		date_range=-1,
		sort_by=-1
	) 
	return render_template('analyze.html', 
							title='Analyze', 
							heading='Dashboard',
							last_update=last_update,
							content=content,
							formdata=formdata)
Ejemplo n.º 2
0
def explore():

	# get and handle form data
	search_type = request.form.get('type','search')
	search_query = request.form.get('query')
	index = request.form.get('index','projects')
	filter_topic = request.form.get('topic','all')
	filter_element = request.form.get('element','all')
	filter_status = request.form.get('status','all')
	date_range = request.form.get('dateRange','50')
	sort_by = request.form.get('sortBy','date')
	doc_type = index[:-1]

	filters=dict(
		element = filter_element,
		status = filter_status,
		date_range = date_range,
		sort_by = sort_by,
		doc_type = doc_type
	)

	content = dict()
	for topic in topics:

		# run query and process response
		kwargs = query.get_query_arguments(topic)
		q = query.Query(**kwargs)
		s = query.run_query(q.query, index=index, filters=filters)
		s = s[:100] # pagination
		r = s.execute()
		content[topic] = r

	formdata = dict(
		type=search_type,
		query=search_query,
		index=index,
		topic=filter_topic,
		element=filter_element,
		status=filter_status,
		date_range=date_range,
		sort_by=sort_by
	)

	buttonStates=dict(
		topic="None",
		element = filter_element,
		status = filter_status,
		date_range = date_range,
		sort_by = sort_by,
		doc_type = doc_type
	)

	last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update']
	return render_template('explore.html', 
							content=content, 
							buttonStates=buttonStates, 
							formdata=formdata,
							heading='Explore',
							title='Explore', 
							last_update=last_update)
Ejemplo n.º 3
0
def more_like_this():
	index = request.form.get('index','projects')
	doc_id = request.form.get('doc_id')

	q = Q(
		{
			"more_like_this": {
			"fields": [
				"title",
				"abstract"
			],
			"like": [
				{
				"_index": index,
				"_type":"doc",
				"_id": doc_id
				}
			]
			}
		}
	)
	s = query.run_query(q, index=index)
	s = s[:5] 
	r = s.execute()
	return jsonify(r.hits.hits)
Ejemplo n.º 4
0
def project_count_by_topic(**kwargs):

    topic_query = kwargs.get("topic")
    topic_filter = kwargs.get("topic_selection")
    element_filter = kwargs.get("element")
    filters = dict(element=element_filter, topic=topic_filter)

    # run query
    kwargs = query.get_query_arguments(topic_query)
    q = query.Query(**kwargs)
    s = query.run_query(q.query, index=index, filters=filters)
    count = s.count()

    # aggregate doc ids
    a1 = A(
        "terms",
        field="_id",
        size=5000,
    )

    # chain aggregations and execute
    s.aggs.bucket('doc_ids', a1)
    response = s.execute()

    # filter response
    doc_ids = []
    for b in response.aggregations.doc_ids.buckets:
        doc_ids.append(b['key'])

    return count, doc_ids
Ejemplo n.º 5
0
def publication_count(queries=None):

    # search object
    s = Search(using=client, index='publications')

    if queries:

        tag = queries.get("tag")
        element_tag = queries.get("element_tag")
        filters = dict(topic=tag, element=element_tag)

        index = 'publications'
        # kwargs = query.get_query_arguments(tag)
        # q = query.Query(**kwargs)
        s = query.run_query(Q({"match_all": {}}), index=index, filters=filters)

        count = s.count()

    else:

        # query
        total = Q({"match_phrase": {"doc_type": {"query": "publication"}}})

        count = s.query(total).count()

    return count
Ejemplo n.º 6
0
def project_count_by_state(queries=None):

    # search object
    s = Search(using=client, index=index)

    if queries:

        tag = queries.get("tag")
        element_tag = queries.get("element_tag")
        filters = dict(topic=tag, element=element_tag)

        s = query.run_query(Q({"match_all": {}}), index=index, filters=filters)

    # aggregations
    a1 = A("nested", path="funding_agencies")
    a2 = A("terms",
           field="funding_agencies.state.keyword",
           size=50,
           order={"_count": "desc"})
    a3 = A(
        "terms",
        field="_id",
        size=5000,
    )

    # chain aggregations and execute
    s.aggs\
     .bucket('agencies', a1)\
     .bucket('states',a2)\
     .bucket('doc_ids', a3)
    response = s.execute()

    # filter response
    res = {}
    for b in response.aggregations.agencies.states.buckets:
        state = b['key']
        doc_count = b['doc_count']
        res[state] = dict(doc_count=doc_count,
                          doc_ids=[doc['key'] for doc in b.doc_ids.buckets])

    return res
Ejemplo n.º 7
0
def project_count(queries=None):

    # search object
    s = Search(using=client, index=index)

    allStatus = ['Active', 'Completed', 'Programmed', 'Proposed']

    if queries:

        tag = queries.get("tag")
        element_tag = queries.get("element_tag")
        filters = dict(element=element_tag, topic=tag)

        # run query
        # if tag == 'all':
        # 	s = query.run_query(Q({"match_all":{}}), index=index, filters=filters)
        # else:
        # 	kwargs = query.get_query_arguments(tag)
        # 	q = query.Query(**kwargs)
        # 	s = query.run_query(q.query, index='projects', filters=filters)
        s = query.run_query(Q({"match_all": {}}), index=index, filters=filters)
        res = {}
        res['total'] = s.count()
        for status in allStatus:
            res[status.lower()] = s.filter("match", status=status).count()

    else:

        # query
        total = Q({"match_phrase": {"doc_type": {"query": "project"}}})
        s = s.query(total)
        res = {}
        res['total'] = s.count()
        for status in allStatus:
            q = Q({"match_phrase": {"status.keyword": {"query": status}}})
            res[status.lower()] = s.query(q).count()

    return res
Ejemplo n.º 8
0
def tag_documents(index_name, topic_tags, element_tags):
    def process_hits(hits):
        for item in hits:
            id = item['_id']
            index_name = item["_index"]
            if index_name == 'projects':
                doc = models.Project.get(using=client, index=index_name, id=id)
            elif index_name == 'publications':
                doc = models.Publication.get(using=client,
                                             index=index_name,
                                             id=id)
            doc.update(using=client,
                       index=index_name,
                       request_timeout=20,
                       tags=list(),
                       element_tags=list())
            print(f'{index_name} - doc ({id}): tags removed')

    def remove_tags(index_name):

        # Init scroll by search
        data = client.search(index=index_name,
                             doc_type='doc',
                             scroll='2m',
                             size=1000,
                             body={"query": {
                                 "match_all": {}
                             }})

        # Get the scroll ID
        sid = data['_scroll_id']
        scroll_size = len(data['hits']['hits'])

        # Before scroll, process current batch of hits
        process_hits(data['hits']['hits'])

        while scroll_size > 0:

            data = client.scroll(scroll_id=sid, scroll='2m')

            # Process current batch of hits
            process_hits(data['hits']['hits'])

            # Update the scroll ID
            sid = data['_scroll_id']

            # Get the number of results that returned in the last scroll
            scroll_size = len(data['hits']['hits'])

    # first remove all tags
    remove_tags(index_name)

    # topic tags
    for tag in topic_tags:
        kwargs = query.get_query_arguments(tag)
        q = query.Query(**kwargs)
        s = query.run_query(q.query, index=index_name)
        hits, _ = query.process_search_response(s, last=s.count())
        for id in hits:
            if index_name == 'projects':
                doc = models.Project.get(using=client, index=index_name, id=id)
            elif index_name == 'publications':
                doc = models.Publication.get(using=client,
                                             index=index_name,
                                             id=id)

            if doc.tags:
                current_tags = list(doc.tags)
            else:
                current_tags = []

            current_tags.append(tag)
            current_tags_set = set(current_tags)
            doc.update(using=client,
                       index=index_name,
                       tags=list(current_tags_set))

            print(f'{index_name} - doc ({id}): updated with {tag}')

    # element tags
    for tag in element_tags:
        kwargs = query.get_query_arguments(tag)
        q = query.Query(**kwargs)
        s = query.run_query(q.query, index=index_name)
        hits, _ = query.process_search_response(s, last=s.count())
        for id in hits:
            if index_name == 'projects':
                doc = models.Project.get(using=client, index=index_name, id=id)
            elif index_name == 'publications':
                doc = models.Publication.get(using=client,
                                             index=index_name,
                                             id=id)

            if doc.element_tags:
                current_tags = list(doc.element_tags)
            else:
                current_tags = []

            current_tags.append(tag)
            current_tags_set = set(current_tags)
            doc.update(using=client,
                       index=index_name,
                       element_tags=list(current_tags_set))

            print(f'{index_name} - doc ({id}): updated with {tag}')
Ejemplo n.º 9
0
def results():

	if request.referrer.split('/')[-1] == 'update':
		return redirect(url_for('results'))

	# format for front end display
	formatstr = lambda s: s.replace("_"," ")

	if request.method == 'GET':
	# retrieve get requests
		search_type = request.args.get('type','search')
		search_query = request.args.get('query')
		index = request.args.get('index','projects')
		filter_topic = request.args.get('topic','all')
		filter_element = request.args.get('element','all')
		filter_status = request.args.get('status','all')
		date_range = request.args.get('dateRange','50')
		sort_by = request.args.get('sortBy','date')
		doc_type = index[:-1]

	if request.method == 'POST' and request.form['form'] == 'filters':
	# retrieve form submission
		search_type = request.form.get('type','search')
		search_query = request.form.get('query')
		index = request.form.get('index','projects')
		filter_topic = request.form.get('topic','all')
		filter_element = request.form.get('element','all')
		filter_status = request.form.get('status','all')
		date_range = request.form.get('dateRange','50')
		sort_by = request.form.get('sortBy','date')
		doc_type = index[:-1]
	

	# handle requests
	if search_type == 'click_count': 
	# if user clicked project or pub count in dashboard

		if filter_topic != 'all' and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{formatstr(filter_topic)}" and "{formatstr(filter_element)}"'
		elif filter_topic != 'all' and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{formatstr(filter_topic)}"'
		elif filter_topic == 'all' and filter_element != 'all':
		# user filtered element in dashboard
			clicked = f'for "{formatstr(filter_element)}"'
		elif filter_topic == 'all' and filter_element == 'all':
		# no filters
			clicked = f"for all {index}"

		filters = dict(
			topic=filter_topic,
			element=filter_element,
			doc_type=doc_type,
			date_range=date_range,
			status = filter_status,
			sort_by=sort_by
		)
		if filter_topic == 'all':
			if filter_element == 'all':
				q = Q({"match_all": {}}) # note: sorting does not apply to match all
				s = query.run_query(q, index=index, filters=filters)
			else:
				kwargs = query.get_query_arguments(filter_element)
				q = query.Query(**kwargs)
				s = query.run_query(q.query, index=index, filters=filters)
		else:
			kwargs = query.get_query_arguments(filter_topic)
			q = query.Query(**kwargs)
			s = query.run_query(q.query, index=index, filters=filters)
		

	elif search_type == 'click_bar': 
	# if user clicked on bar chart

		if search_query == filter_topic and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{formatstr(search_query)}" and "{formatstr(filter_element)}"'
		elif search_query == filter_topic and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{formatstr(search_query)}"'
		elif search_query != filter_topic and filter_topic != 'all' and filter_element != 'all':
		# user filtered topic and elements in dashboard, and clicked on different bar
			clicked = f'for "{formatstr(search_query)}", "{formatstr(filter_topic)}", and "{formatstr(filter_element)}"'
		elif search_query != filter_topic and filter_topic != 'all' and  filter_element == 'all':
		# user filtered topic in dashboard, and clicked on different bar
			clicked = f'for "{formatstr(search_query)}", and "{formatstr(filter_topic)}"'
		elif search_query != filter_topic and filter_topic == 'all' and  filter_element != 'all':
		# user filtered elements in dashboard, and clicked on different bar
			clicked = f'for "{formatstr(search_query)}", and "{formatstr(filter_element)}"'
		else:
		# no filters
			clicked = f'for "{formatstr(search_query)}"'

		filters = dict(
			topic = filter_topic,
			element = filter_element,
			doc_type = doc_type,
			status = filter_status,
			date_range = date_range,
			sort_by = sort_by
		)

		kwargs = query.get_query_arguments(search_query)
		q = query.Query(**kwargs)
		s = query.run_query(q.query, index=index, filters=filters)

	elif search_type == 'click_map': 
	# if user clicked state on map

		if filter_topic != 'all' and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{search_query}", "{formatstr(filter_topic)}", and "{formatstr(filter_element)}"'
		elif filter_topic != 'all' and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_topic)}"'
		elif filter_topic == 'all' and filter_element != 'all':
		# user filtered element in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_element)}"'
		elif filter_topic == 'all' and filter_element == 'all':
		# no filters
			clicked = f'for "{search_query}"'

		filters = dict(
			topic=filter_topic,
			element=filter_element,
			doc_type=doc_type,
			status = filter_status,
			date_range = date_range,
			sort_by=sort_by
		)

		q = Q({"nested" : {
					"path" : "funding_agencies",
					"query" : {
						"bool" : {
							"must" : [
								{ "match" : {"funding_agencies.state" : search_query} }
							]
						}
					}
				}
				
			}
		)
			
		s = query.run_query(q, index=index, filters=filters)

	elif search_type == 'search' and search_query != 'None': 
	# if a free search was requested by the user

		if filter_topic != 'all' and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{search_query}", "{formatstr(filter_topic)}" and "{formatstr(filter_element)}"'
		elif filter_topic != 'all' and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_topic)}"'
		elif filter_topic == 'all' and filter_element != 'all':
		# user filtered element in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_element)}"'
		elif filter_topic == 'all' and filter_element == 'all':
		# no filters
			clicked = f'for "{search_query}"'

		filters = dict(
			topic=filter_topic,
			element=filter_element,
			doc_type=doc_type,
			status = filter_status,
			date_range = date_range,
			sort_by=sort_by
		)
		
		q = Q({"multi_match" : {
			"query" : search_query,
			"fields" : [ "title", "abstract" ] 
			}
		})

		s = query.run_query(q, index=index, filters=filters)

	else:
		if request.referrer.split('/')[-1] == 'explore':
			return redirect(url_for('explore'))


	s = s[:1000] # pagination
	r = s.execute()
	# print(r[0].objectives)
	
	buttonStates=dict(
		type = search_type,
		topic = filter_topic,
		element = filter_element,
		status = filter_status,
		date_range = date_range,
		sort_by = sort_by,
		doc_type = doc_type
	)

	formdata = dict(
		type=search_type,
		query=search_query,
		index=index,
		topic=filter_topic,
		element=filter_element,
		status=filter_status,
		date_range=date_range,
		sort_by=sort_by
	) 
	
	last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update']
	return render_template('results.html', 
							title='Results', 
							heading=f'Search Results',
							content=r, 
							clicked=clicked,
							buttonStates=buttonStates,
							formdata=formdata,
							last_update=last_update)
Ejemplo n.º 10
0
def funding_by_state(**kwargs):

    # topic_query = kwargs.get("topic")
    topic = kwargs.get("topic")
    element = kwargs.get("element")
    filters = dict(element=element, topic=topic)

    # run query
    s = query.run_query(Q({"match_all": {}}), index=index, filters=filters)

    # aggregations
    a1 = A("nested", path="funding_agencies")
    a2 = A(
        "terms",
        field="funding_agencies.state.keyword",
        size=50,
        order={"_count": "desc"},
    )
    a3 = A("reverse_nested")
    a4 = A("range",
           field="funding",
           ranges=[{
               "from": 0,
               "to": 100000
           }, {
               "from": 100000,
               "to": 250000
           }, {
               "from": 250000,
               "to": 500000
           }, {
               "from": 500000,
               "to": 750000
           }, {
               "from": 750000,
               "to": 1000000
           }, {
               "from": 1000000
           }],
           keyed=True)

    # chain aggregations and execute
    s.aggs\
     .bucket('agencies', a1)\
     .bucket('states',a2)\
     .bucket('reverse',a3)\
     .bucket('fund_amt',a4)
    response = s.execute()

    # filter response
    res = {}
    for b in response.aggregations.agencies.states.buckets:
        state = b.key
        if len(state) > 2:
            continue
        if state in res:
            continue
        buckets = b.reverse.fund_amt.buckets.to_dict()
        res[state] = buckets

    return res