Exemple #1
0
def explore():

	# get and handle form data
	search_type = request.form.get('type','search')
	search_query = request.form.get('query')
	index = request.form.get('index','projects')
	filter_topic = request.form.get('topic','all')
	filter_element = request.form.get('element','all')
	filter_status = request.form.get('status','all')
	date_range = request.form.get('dateRange','50')
	sort_by = request.form.get('sortBy','date')
	doc_type = index[:-1]

	filters=dict(
		element = filter_element,
		status = filter_status,
		date_range = date_range,
		sort_by = sort_by,
		doc_type = doc_type
	)

	content = dict()
	for topic in topics:

		# run query and process response
		kwargs = query.get_query_arguments(topic)
		q = query.Query(**kwargs)
		s = query.run_query(q.query, index=index, filters=filters)
		s = s[:100] # pagination
		r = s.execute()
		content[topic] = r

	formdata = dict(
		type=search_type,
		query=search_query,
		index=index,
		topic=filter_topic,
		element=filter_element,
		status=filter_status,
		date_range=date_range,
		sort_by=sort_by
	)

	buttonStates=dict(
		topic="None",
		element = filter_element,
		status = filter_status,
		date_range = date_range,
		sort_by = sort_by,
		doc_type = doc_type
	)

	last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update']
	return render_template('explore.html', 
							content=content, 
							buttonStates=buttonStates, 
							formdata=formdata,
							heading='Explore',
							title='Explore', 
							last_update=last_update)
def project_count_by_topic(**kwargs):

    topic_query = kwargs.get("topic")
    topic_filter = kwargs.get("topic_selection")
    element_filter = kwargs.get("element")
    filters = dict(element=element_filter, topic=topic_filter)

    # run query
    kwargs = query.get_query_arguments(topic_query)
    q = query.Query(**kwargs)
    s = query.run_query(q.query, index=index, filters=filters)
    count = s.count()

    # aggregate doc ids
    a1 = A(
        "terms",
        field="_id",
        size=5000,
    )

    # chain aggregations and execute
    s.aggs.bucket('doc_ids', a1)
    response = s.execute()

    # filter response
    doc_ids = []
    for b in response.aggregations.doc_ids.buckets:
        doc_ids.append(b['key'])

    return count, doc_ids
Exemple #3
0
def tag_documents(index_name, topic_tags, element_tags):
    def process_hits(hits):
        for item in hits:
            id = item['_id']
            index_name = item["_index"]
            if index_name == 'projects':
                doc = models.Project.get(using=client, index=index_name, id=id)
            elif index_name == 'publications':
                doc = models.Publication.get(using=client,
                                             index=index_name,
                                             id=id)
            doc.update(using=client,
                       index=index_name,
                       request_timeout=20,
                       tags=list(),
                       element_tags=list())
            print(f'{index_name} - doc ({id}): tags removed')

    def remove_tags(index_name):

        # Init scroll by search
        data = client.search(index=index_name,
                             doc_type='doc',
                             scroll='2m',
                             size=1000,
                             body={"query": {
                                 "match_all": {}
                             }})

        # Get the scroll ID
        sid = data['_scroll_id']
        scroll_size = len(data['hits']['hits'])

        # Before scroll, process current batch of hits
        process_hits(data['hits']['hits'])

        while scroll_size > 0:

            data = client.scroll(scroll_id=sid, scroll='2m')

            # Process current batch of hits
            process_hits(data['hits']['hits'])

            # Update the scroll ID
            sid = data['_scroll_id']

            # Get the number of results that returned in the last scroll
            scroll_size = len(data['hits']['hits'])

    # first remove all tags
    remove_tags(index_name)

    # topic tags
    for tag in topic_tags:
        kwargs = query.get_query_arguments(tag)
        q = query.Query(**kwargs)
        s = query.run_query(q.query, index=index_name)
        hits, _ = query.process_search_response(s, last=s.count())
        for id in hits:
            if index_name == 'projects':
                doc = models.Project.get(using=client, index=index_name, id=id)
            elif index_name == 'publications':
                doc = models.Publication.get(using=client,
                                             index=index_name,
                                             id=id)

            if doc.tags:
                current_tags = list(doc.tags)
            else:
                current_tags = []

            current_tags.append(tag)
            current_tags_set = set(current_tags)
            doc.update(using=client,
                       index=index_name,
                       tags=list(current_tags_set))

            print(f'{index_name} - doc ({id}): updated with {tag}')

    # element tags
    for tag in element_tags:
        kwargs = query.get_query_arguments(tag)
        q = query.Query(**kwargs)
        s = query.run_query(q.query, index=index_name)
        hits, _ = query.process_search_response(s, last=s.count())
        for id in hits:
            if index_name == 'projects':
                doc = models.Project.get(using=client, index=index_name, id=id)
            elif index_name == 'publications':
                doc = models.Publication.get(using=client,
                                             index=index_name,
                                             id=id)

            if doc.element_tags:
                current_tags = list(doc.element_tags)
            else:
                current_tags = []

            current_tags.append(tag)
            current_tags_set = set(current_tags)
            doc.update(using=client,
                       index=index_name,
                       element_tags=list(current_tags_set))

            print(f'{index_name} - doc ({id}): updated with {tag}')
Exemple #4
0
def results():

	if request.referrer.split('/')[-1] == 'update':
		return redirect(url_for('results'))

	# format for front end display
	formatstr = lambda s: s.replace("_"," ")

	if request.method == 'GET':
	# retrieve get requests
		search_type = request.args.get('type','search')
		search_query = request.args.get('query')
		index = request.args.get('index','projects')
		filter_topic = request.args.get('topic','all')
		filter_element = request.args.get('element','all')
		filter_status = request.args.get('status','all')
		date_range = request.args.get('dateRange','50')
		sort_by = request.args.get('sortBy','date')
		doc_type = index[:-1]

	if request.method == 'POST' and request.form['form'] == 'filters':
	# retrieve form submission
		search_type = request.form.get('type','search')
		search_query = request.form.get('query')
		index = request.form.get('index','projects')
		filter_topic = request.form.get('topic','all')
		filter_element = request.form.get('element','all')
		filter_status = request.form.get('status','all')
		date_range = request.form.get('dateRange','50')
		sort_by = request.form.get('sortBy','date')
		doc_type = index[:-1]
	

	# handle requests
	if search_type == 'click_count': 
	# if user clicked project or pub count in dashboard

		if filter_topic != 'all' and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{formatstr(filter_topic)}" and "{formatstr(filter_element)}"'
		elif filter_topic != 'all' and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{formatstr(filter_topic)}"'
		elif filter_topic == 'all' and filter_element != 'all':
		# user filtered element in dashboard
			clicked = f'for "{formatstr(filter_element)}"'
		elif filter_topic == 'all' and filter_element == 'all':
		# no filters
			clicked = f"for all {index}"

		filters = dict(
			topic=filter_topic,
			element=filter_element,
			doc_type=doc_type,
			date_range=date_range,
			status = filter_status,
			sort_by=sort_by
		)
		if filter_topic == 'all':
			if filter_element == 'all':
				q = Q({"match_all": {}}) # note: sorting does not apply to match all
				s = query.run_query(q, index=index, filters=filters)
			else:
				kwargs = query.get_query_arguments(filter_element)
				q = query.Query(**kwargs)
				s = query.run_query(q.query, index=index, filters=filters)
		else:
			kwargs = query.get_query_arguments(filter_topic)
			q = query.Query(**kwargs)
			s = query.run_query(q.query, index=index, filters=filters)
		

	elif search_type == 'click_bar': 
	# if user clicked on bar chart

		if search_query == filter_topic and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{formatstr(search_query)}" and "{formatstr(filter_element)}"'
		elif search_query == filter_topic and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{formatstr(search_query)}"'
		elif search_query != filter_topic and filter_topic != 'all' and filter_element != 'all':
		# user filtered topic and elements in dashboard, and clicked on different bar
			clicked = f'for "{formatstr(search_query)}", "{formatstr(filter_topic)}", and "{formatstr(filter_element)}"'
		elif search_query != filter_topic and filter_topic != 'all' and  filter_element == 'all':
		# user filtered topic in dashboard, and clicked on different bar
			clicked = f'for "{formatstr(search_query)}", and "{formatstr(filter_topic)}"'
		elif search_query != filter_topic and filter_topic == 'all' and  filter_element != 'all':
		# user filtered elements in dashboard, and clicked on different bar
			clicked = f'for "{formatstr(search_query)}", and "{formatstr(filter_element)}"'
		else:
		# no filters
			clicked = f'for "{formatstr(search_query)}"'

		filters = dict(
			topic = filter_topic,
			element = filter_element,
			doc_type = doc_type,
			status = filter_status,
			date_range = date_range,
			sort_by = sort_by
		)

		kwargs = query.get_query_arguments(search_query)
		q = query.Query(**kwargs)
		s = query.run_query(q.query, index=index, filters=filters)

	elif search_type == 'click_map': 
	# if user clicked state on map

		if filter_topic != 'all' and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{search_query}", "{formatstr(filter_topic)}", and "{formatstr(filter_element)}"'
		elif filter_topic != 'all' and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_topic)}"'
		elif filter_topic == 'all' and filter_element != 'all':
		# user filtered element in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_element)}"'
		elif filter_topic == 'all' and filter_element == 'all':
		# no filters
			clicked = f'for "{search_query}"'

		filters = dict(
			topic=filter_topic,
			element=filter_element,
			doc_type=doc_type,
			status = filter_status,
			date_range = date_range,
			sort_by=sort_by
		)

		q = Q({"nested" : {
					"path" : "funding_agencies",
					"query" : {
						"bool" : {
							"must" : [
								{ "match" : {"funding_agencies.state" : search_query} }
							]
						}
					}
				}
				
			}
		)
			
		s = query.run_query(q, index=index, filters=filters)

	elif search_type == 'search' and search_query != 'None': 
	# if a free search was requested by the user

		if filter_topic != 'all' and filter_element != 'all': 
		# user filtered topic and elements in dashboard
			clicked = f'for "{search_query}", "{formatstr(filter_topic)}" and "{formatstr(filter_element)}"'
		elif filter_topic != 'all' and filter_element =='all':
		# user filtered topic in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_topic)}"'
		elif filter_topic == 'all' and filter_element != 'all':
		# user filtered element in dashboard
			clicked = f'for "{search_query}", and "{formatstr(filter_element)}"'
		elif filter_topic == 'all' and filter_element == 'all':
		# no filters
			clicked = f'for "{search_query}"'

		filters = dict(
			topic=filter_topic,
			element=filter_element,
			doc_type=doc_type,
			status = filter_status,
			date_range = date_range,
			sort_by=sort_by
		)
		
		q = Q({"multi_match" : {
			"query" : search_query,
			"fields" : [ "title", "abstract" ] 
			}
		})

		s = query.run_query(q, index=index, filters=filters)

	else:
		if request.referrer.split('/')[-1] == 'explore':
			return redirect(url_for('explore'))


	s = s[:1000] # pagination
	r = s.execute()
	# print(r[0].objectives)
	
	buttonStates=dict(
		type = search_type,
		topic = filter_topic,
		element = filter_element,
		status = filter_status,
		date_range = date_range,
		sort_by = sort_by,
		doc_type = doc_type
	)

	formdata = dict(
		type=search_type,
		query=search_query,
		index=index,
		topic=filter_topic,
		element=filter_element,
		status=filter_status,
		date_range=date_range,
		sort_by=sort_by
	) 
	
	last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update']
	return render_template('results.html', 
							title='Results', 
							heading=f'Search Results',
							content=r, 
							clicked=clicked,
							buttonStates=buttonStates,
							formdata=formdata,
							last_update=last_update)