def index(request): if request.method == 'POST': # If the form has been submitted... form = ListForm(request.POST) # A form bound to the POST data if form.is_valid(): query = form.cleaned_data['query'] target_field = form.cleaned_data['target_field'] target_value = form.cleaned_data['target_value'] connector = export_solr() results = connector.update_by_query(query=query, field=target_field, value=target_value) return render(request, 'querytagger/querytagger_index.html', { "form": form, "found": results, "query": query, "results": True, }) else: return render(request, 'querytagger/querytagger_index.html', { 'form': form, }) else: form = ListForm(initial={ 'target_field': 'tag_ss', 'target_value': 'myTag' }) # An unbound form return render(request, 'querytagger/querytagger_index.html', { 'form': form, })
def index(request): if request.method == 'POST': # If the form has been submitted... form = ListForm(request.POST) # A form bound to the POST data if form.is_valid(): query = form.cleaned_data['query'] target_field = form.cleaned_data['target_field'] target_value = form.cleaned_data['target_value'] connector = export_solr() results = connector.update_by_query(query=query, field=target_field, value=target_value) return render(request, 'querytagger/querytagger_index.html', { "form": form, "found": results, "query": query, "results": True, }) else: return render(request, 'querytagger/querytagger_index.html', {'form': form,}) else: form = ListForm(initial={'target_field': 'tag_ss', 'target_value': 'myTag'}) # An unbound form return render(request, 'querytagger/querytagger_index.html', {'form': form,})
def matches(self, text, dict_ids=None): matches = {} if not dict_ids: dict_ids = self.get_dictionaries() hash = hashlib.sha256(text.encode('utf-8')) docid = 'sha256_' + hash.hexdigest() solr = export_solr.export_solr(solr=self.solr, core=self.solr_core) data = {} data['id'] = docid # content of this field will be analyzed by dictionary matchers but not indexed (setup in preconfigured Solr schema for better performance) data['do_not_index_txt'] = text solr.post(data=data, commit=True) headers = {'content-type': 'application/json'} params = { 'wt': 'json', 'rows': 0, # we do not need document field results, only the facet 'facet.limit': -1, # This param indicates the maximum number of constraint counts that should be returned for the facet fields. A negative value means unlimited. 'facet.mincount': 1, 'facet': 'on', 'facet.field': dict_ids, 'q': 'id:' + docid } r = requests.get(self.solr + self.solr_core + '/select', params=params, headers=headers) result = r.json() for dict_id in dict_ids: if dict_id in result['facet_counts']['facet_fields']: matches[dict_id] = [] is_value = True for value in result['facet_counts']['facet_fields'][dict_id]: if is_value: matches[dict_id].append(value) # next list entry is count is_value = False else: # next list entry is a value is_value = True # delete analyzed and indexed text from dictionary index solr = pysolr.Solr(self.solr + self.solr_core) result = solr.delete(id=docid) return matches
def tag_concept(concept): # Todo: For more performance do only one query/search for all labels and aliases of the concept verbose = False default_facet = "tag_ss" count_queries = 0 count_tagged = 0 connector = export_solr(verbose=verbose) log = [] if verbose: log.append("Checking concept: {}".format(concept.prefLabel)) # if no entity facet use default facet if concept.facet: facet = concept.facet.facet else: facet = default_facet # add entity title to facet value = concept.prefLabel # if no prefLabel, use the query as value for tagging if not value: value = concept.query tagdata = add_value_to_facet(facet=facet, value=value) # Tag with all additional tags for concepttag in ConceptTag.objects.filter(concept=concept.id): # if no concepttag facet use default facet if concepttag.facet: facet = concepttag.facet.facet else: facet = default_facet # add concepttag title as values of this facet tagdata = add_value_to_facet(facet=facet, value=concepttag.label, data=tagdata) # Tag with all groups for group in concept.groups.all(): tagdata = get_grouptags(group, default_facet=default_facet, data=tagdata) # build query searching for concept but only if not tagged yet (concept prefLabel not in target facet) searchquery, searchqueryparameters = build_searchquery( label=concept.prefLabel, query=concept.query, querytype=concept.query_type) if verbose: print("Search query:") print(searchquery) print("Search query parameters:") print(searchqueryparameters) count_queries += 1 count = connector.update_by_query(query=searchquery, queryparameters=searchqueryparameters, data=tagdata) if count: count_tagged += count log.append( "Tagged {} yet untagged entries with tags of the concept \"{}\"". format(count, concept.prefLabel)) # Search aliases and tag them, too for alternate in Alternate.objects.filter(concept=concept.id): if verbose: log.append("Checking alias: {}".format(alternate.altLabel)) searchquery, searchquery_parameters = build_searchquery( label=alternate.altLabel, query=alternate.query, querytype=alternate.query_type) count_queries += 1 count = connector.update_by_query( query=searchquery, queryparameters=searchquery_parameters, data=tagdata) if count: count_tagged += count log.append( "Tagged {} yet untagged entries containing alias \"{}\" with tags of the concept \"{}\"" .format(count, alternate.altLabel, concept.prefLabel)) # Search aliases and tag them, too for hidden in Hidden.objects.filter(concept=concept.id): if verbose: log.append("Checking hidden label: {}".format(hidden.hiddenLabel)) searchquery, searchquery_parameters = build_searchquery( label=hidden.hiddenLabel, query=hidden.query, querytype=hidden.query_type) count_queries += 1 count = connector.update_by_query( query=searchquery, queryparameters=searchquery_parameters, data=tagdata) if count: count_tagged += count log.append( "Tagged {} yet untagged entries containing hidden label \"{}\" with tags of the concept \"{}\"" .format(count, alias.label, concept.prefLabel)) return count_queries, count_tagged, log