def explore(): # get and handle form data search_type = request.form.get('type','search') search_query = request.form.get('query') index = request.form.get('index','projects') filter_topic = request.form.get('topic','all') filter_element = request.form.get('element','all') filter_status = request.form.get('status','all') date_range = request.form.get('dateRange','50') sort_by = request.form.get('sortBy','date') doc_type = index[:-1] filters=dict( element = filter_element, status = filter_status, date_range = date_range, sort_by = sort_by, doc_type = doc_type ) content = dict() for topic in topics: # run query and process response kwargs = query.get_query_arguments(topic) q = query.Query(**kwargs) s = query.run_query(q.query, index=index, filters=filters) s = s[:100] # pagination r = s.execute() content[topic] = r formdata = dict( type=search_type, query=search_query, index=index, topic=filter_topic, element=filter_element, status=filter_status, date_range=date_range, sort_by=sort_by ) buttonStates=dict( topic="None", element = filter_element, status = filter_status, date_range = date_range, sort_by = sort_by, doc_type = doc_type ) last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update'] return render_template('explore.html', content=content, buttonStates=buttonStates, formdata=formdata, heading='Explore', title='Explore', last_update=last_update)
def project_count_by_topic(**kwargs): topic_query = kwargs.get("topic") topic_filter = kwargs.get("topic_selection") element_filter = kwargs.get("element") filters = dict(element=element_filter, topic=topic_filter) # run query kwargs = query.get_query_arguments(topic_query) q = query.Query(**kwargs) s = query.run_query(q.query, index=index, filters=filters) count = s.count() # aggregate doc ids a1 = A( "terms", field="_id", size=5000, ) # chain aggregations and execute s.aggs.bucket('doc_ids', a1) response = s.execute() # filter response doc_ids = [] for b in response.aggregations.doc_ids.buckets: doc_ids.append(b['key']) return count, doc_ids
def tag_documents(index_name, topic_tags, element_tags): def process_hits(hits): for item in hits: id = item['_id'] index_name = item["_index"] if index_name == 'projects': doc = models.Project.get(using=client, index=index_name, id=id) elif index_name == 'publications': doc = models.Publication.get(using=client, index=index_name, id=id) doc.update(using=client, index=index_name, request_timeout=20, tags=list(), element_tags=list()) print(f'{index_name} - doc ({id}): tags removed') def remove_tags(index_name): # Init scroll by search data = client.search(index=index_name, doc_type='doc', scroll='2m', size=1000, body={"query": { "match_all": {} }}) # Get the scroll ID sid = data['_scroll_id'] scroll_size = len(data['hits']['hits']) # Before scroll, process current batch of hits process_hits(data['hits']['hits']) while scroll_size > 0: data = client.scroll(scroll_id=sid, scroll='2m') # Process current batch of hits process_hits(data['hits']['hits']) # Update the scroll ID sid = data['_scroll_id'] # Get the number of results that returned in the last scroll scroll_size = len(data['hits']['hits']) # first remove all tags remove_tags(index_name) # topic tags for tag in topic_tags: kwargs = query.get_query_arguments(tag) q = query.Query(**kwargs) s = query.run_query(q.query, index=index_name) hits, _ = query.process_search_response(s, last=s.count()) for id in hits: if index_name == 'projects': doc = models.Project.get(using=client, index=index_name, id=id) elif index_name == 'publications': doc = models.Publication.get(using=client, index=index_name, id=id) if doc.tags: current_tags = list(doc.tags) else: current_tags = [] current_tags.append(tag) current_tags_set = set(current_tags) doc.update(using=client, index=index_name, tags=list(current_tags_set)) print(f'{index_name} - doc ({id}): updated with {tag}') # element tags for tag in element_tags: kwargs = query.get_query_arguments(tag) q = query.Query(**kwargs) s = query.run_query(q.query, index=index_name) hits, _ = query.process_search_response(s, last=s.count()) for id in hits: if index_name == 'projects': doc = models.Project.get(using=client, index=index_name, id=id) elif index_name == 'publications': doc = models.Publication.get(using=client, index=index_name, id=id) if doc.element_tags: current_tags = list(doc.element_tags) else: current_tags = [] current_tags.append(tag) current_tags_set = set(current_tags) doc.update(using=client, index=index_name, element_tags=list(current_tags_set)) print(f'{index_name} - doc ({id}): updated with {tag}')
def results(): if request.referrer.split('/')[-1] == 'update': return redirect(url_for('results')) # format for front end display formatstr = lambda s: s.replace("_"," ") if request.method == 'GET': # retrieve get requests search_type = request.args.get('type','search') search_query = request.args.get('query') index = request.args.get('index','projects') filter_topic = request.args.get('topic','all') filter_element = request.args.get('element','all') filter_status = request.args.get('status','all') date_range = request.args.get('dateRange','50') sort_by = request.args.get('sortBy','date') doc_type = index[:-1] if request.method == 'POST' and request.form['form'] == 'filters': # retrieve form submission search_type = request.form.get('type','search') search_query = request.form.get('query') index = request.form.get('index','projects') filter_topic = request.form.get('topic','all') filter_element = request.form.get('element','all') filter_status = request.form.get('status','all') date_range = request.form.get('dateRange','50') sort_by = request.form.get('sortBy','date') doc_type = index[:-1] # handle requests if search_type == 'click_count': # if user clicked project or pub count in dashboard if filter_topic != 'all' and filter_element != 'all': # user filtered topic and elements in dashboard clicked = f'for "{formatstr(filter_topic)}" and "{formatstr(filter_element)}"' elif filter_topic != 'all' and filter_element =='all': # user filtered topic in dashboard clicked = f'for "{formatstr(filter_topic)}"' elif filter_topic == 'all' and filter_element != 'all': # user filtered element in dashboard clicked = f'for "{formatstr(filter_element)}"' elif filter_topic == 'all' and filter_element == 'all': # no filters clicked = f"for all {index}" filters = dict( topic=filter_topic, element=filter_element, doc_type=doc_type, date_range=date_range, status = filter_status, sort_by=sort_by ) if filter_topic == 'all': if filter_element == 'all': q = Q({"match_all": {}}) # note: sorting does not apply to match all s = query.run_query(q, index=index, filters=filters) else: kwargs = query.get_query_arguments(filter_element) q = query.Query(**kwargs) s = query.run_query(q.query, index=index, filters=filters) else: kwargs = query.get_query_arguments(filter_topic) q = query.Query(**kwargs) s = query.run_query(q.query, index=index, filters=filters) elif search_type == 'click_bar': # if user clicked on bar chart if search_query == filter_topic and filter_element != 'all': # user filtered topic and elements in dashboard clicked = f'for "{formatstr(search_query)}" and "{formatstr(filter_element)}"' elif search_query == filter_topic and filter_element =='all': # user filtered topic in dashboard clicked = f'for "{formatstr(search_query)}"' elif search_query != filter_topic and filter_topic != 'all' and filter_element != 'all': # user filtered topic and elements in dashboard, and clicked on different bar clicked = f'for "{formatstr(search_query)}", "{formatstr(filter_topic)}", and "{formatstr(filter_element)}"' elif search_query != filter_topic and filter_topic != 'all' and filter_element == 'all': # user filtered topic in dashboard, and clicked on different bar clicked = f'for "{formatstr(search_query)}", and "{formatstr(filter_topic)}"' elif search_query != filter_topic and filter_topic == 'all' and filter_element != 'all': # user filtered elements in dashboard, and clicked on different bar clicked = f'for "{formatstr(search_query)}", and "{formatstr(filter_element)}"' else: # no filters clicked = f'for "{formatstr(search_query)}"' filters = dict( topic = filter_topic, element = filter_element, doc_type = doc_type, status = filter_status, date_range = date_range, sort_by = sort_by ) kwargs = query.get_query_arguments(search_query) q = query.Query(**kwargs) s = query.run_query(q.query, index=index, filters=filters) elif search_type == 'click_map': # if user clicked state on map if filter_topic != 'all' and filter_element != 'all': # user filtered topic and elements in dashboard clicked = f'for "{search_query}", "{formatstr(filter_topic)}", and "{formatstr(filter_element)}"' elif filter_topic != 'all' and filter_element =='all': # user filtered topic in dashboard clicked = f'for "{search_query}", and "{formatstr(filter_topic)}"' elif filter_topic == 'all' and filter_element != 'all': # user filtered element in dashboard clicked = f'for "{search_query}", and "{formatstr(filter_element)}"' elif filter_topic == 'all' and filter_element == 'all': # no filters clicked = f'for "{search_query}"' filters = dict( topic=filter_topic, element=filter_element, doc_type=doc_type, status = filter_status, date_range = date_range, sort_by=sort_by ) q = Q({"nested" : { "path" : "funding_agencies", "query" : { "bool" : { "must" : [ { "match" : {"funding_agencies.state" : search_query} } ] } } } } ) s = query.run_query(q, index=index, filters=filters) elif search_type == 'search' and search_query != 'None': # if a free search was requested by the user if filter_topic != 'all' and filter_element != 'all': # user filtered topic and elements in dashboard clicked = f'for "{search_query}", "{formatstr(filter_topic)}" and "{formatstr(filter_element)}"' elif filter_topic != 'all' and filter_element =='all': # user filtered topic in dashboard clicked = f'for "{search_query}", and "{formatstr(filter_topic)}"' elif filter_topic == 'all' and filter_element != 'all': # user filtered element in dashboard clicked = f'for "{search_query}", and "{formatstr(filter_element)}"' elif filter_topic == 'all' and filter_element == 'all': # no filters clicked = f'for "{search_query}"' filters = dict( topic=filter_topic, element=filter_element, doc_type=doc_type, status = filter_status, date_range = date_range, sort_by=sort_by ) q = Q({"multi_match" : { "query" : search_query, "fields" : [ "title", "abstract" ] } }) s = query.run_query(q, index=index, filters=filters) else: if request.referrer.split('/')[-1] == 'explore': return redirect(url_for('explore')) s = s[:1000] # pagination r = s.execute() # print(r[0].objectives) buttonStates=dict( type = search_type, topic = filter_topic, element = filter_element, status = filter_status, date_range = date_range, sort_by = sort_by, doc_type = doc_type ) formdata = dict( type=search_type, query=search_query, index=index, topic=filter_topic, element=filter_element, status=filter_status, date_range=date_range, sort_by=sort_by ) last_update = client.get(index='appdata', doc_type='doc', id=1)['_source']['last_update'] return render_template('results.html', title='Results', heading=f'Search Results', content=r, clicked=clicked, buttonStates=buttonStates, formdata=formdata, last_update=last_update)