예제 #1
0
def index(request):

    if request.method == 'POST':  # If the form has been submitted...
        form = ListForm(request.POST)  # A form bound to the POST data
        if form.is_valid():

            query = form.cleaned_data['query']
            target_field = form.cleaned_data['target_field']
            target_value = form.cleaned_data['target_value']

            connector = export_solr()
            results = connector.update_by_query(query=query,
                                                field=target_field,
                                                value=target_value)

            return render(request, 'querytagger/querytagger_index.html', {
                "form": form,
                "found": results,
                "query": query,
                "results": True,
            })
        else:
            return render(request, 'querytagger/querytagger_index.html', {
                'form': form,
            })
    else:
        form = ListForm(initial={
            'target_field': 'tag_ss',
            'target_value': 'myTag'
        })  # An unbound form

        return render(request, 'querytagger/querytagger_index.html', {
            'form': form,
        })
def index(request):

	if request.method == 'POST': # If the form has been submitted...
		form = ListForm(request.POST) # A form bound to the POST data
		if form.is_valid():

			query = form.cleaned_data['query']
			target_field = form.cleaned_data['target_field']
			target_value = form.cleaned_data['target_value']

			connector = export_solr()
			results = connector.update_by_query(query=query, field=target_field, value=target_value)

			return render(request, 'querytagger/querytagger_index.html', 
				{	"form": form,
					"found": results,
					"query": query,
					"results": True,
				})
		else:
			return render(request, 'querytagger/querytagger_index.html', {'form': form,}) 
	else:
		form = ListForm(initial={'target_field': 'tag_ss',
								'target_value': 'myTag'}) # An unbound form
		
		
		return render(request, 'querytagger/querytagger_index.html', {'form': form,}) 
예제 #3
0
    def matches(self, text, dict_ids=None):

        matches = {}

        if not dict_ids:
            dict_ids = self.get_dictionaries()

        hash = hashlib.sha256(text.encode('utf-8'))
        docid = 'sha256_' + hash.hexdigest()

        solr = export_solr.export_solr(solr=self.solr, core=self.solr_core)

        data = {}
        data['id'] = docid
        # content of this field will be analyzed by dictionary matchers but not indexed (setup in preconfigured Solr schema for better performance)
        data['do_not_index_txt'] = text

        solr.post(data=data, commit=True)

        headers = {'content-type': 'application/json'}

        params = {
            'wt': 'json',
            'rows': 0,  # we do not need document field results, only the facet
            'facet.limit':
            -1,  # This param indicates the maximum number of constraint counts that should be returned for the facet fields. A negative value means unlimited.
            'facet.mincount': 1,
            'facet': 'on',
            'facet.field': dict_ids,
            'q': 'id:' + docid
        }

        r = requests.get(self.solr + self.solr_core + '/select',
                         params=params,
                         headers=headers)
        result = r.json()

        for dict_id in dict_ids:
            if dict_id in result['facet_counts']['facet_fields']:
                matches[dict_id] = []

                is_value = True
                for value in result['facet_counts']['facet_fields'][dict_id]:
                    if is_value:
                        matches[dict_id].append(value)
                        # next list entry is count
                        is_value = False
                    else:
                        # next list entry is a value
                        is_value = True

        # delete analyzed and indexed text from dictionary index
        solr = pysolr.Solr(self.solr + self.solr_core)
        result = solr.delete(id=docid)

        return matches
예제 #4
0
def tag_concept(concept):

    # Todo: For more performance do only one query/search for all labels and aliases of the concept

    verbose = False

    default_facet = "tag_ss"

    count_queries = 0
    count_tagged = 0

    connector = export_solr(verbose=verbose)

    log = []

    if verbose:
        log.append("Checking concept: {}".format(concept.prefLabel))

    # if no entity facet use default facet
    if concept.facet:
        facet = concept.facet.facet
    else:
        facet = default_facet

    # add entity title to facet
    value = concept.prefLabel

    # if no prefLabel, use the query as value for tagging
    if not value:
        value = concept.query

    tagdata = add_value_to_facet(facet=facet, value=value)

    # Tag with all additional tags
    for concepttag in ConceptTag.objects.filter(concept=concept.id):

        # if no concepttag facet use default facet
        if concepttag.facet:
            facet = concepttag.facet.facet
        else:
            facet = default_facet

        # add concepttag title as values of this facet
        tagdata = add_value_to_facet(facet=facet,
                                     value=concepttag.label,
                                     data=tagdata)

    # Tag with all groups

    for group in concept.groups.all():
        tagdata = get_grouptags(group,
                                default_facet=default_facet,
                                data=tagdata)

    # build query searching for concept but only if not tagged yet (concept prefLabel not in target facet)
    searchquery, searchqueryparameters = build_searchquery(
        label=concept.prefLabel,
        query=concept.query,
        querytype=concept.query_type)

    if verbose:
        print("Search query:")
        print(searchquery)
        print("Search query parameters:")
        print(searchqueryparameters)

    count_queries += 1

    count = connector.update_by_query(query=searchquery,
                                      queryparameters=searchqueryparameters,
                                      data=tagdata)

    if count:
        count_tagged += count
        log.append(
            "Tagged {} yet untagged entries with tags of the concept \"{}\"".
            format(count, concept.prefLabel))

    # Search aliases and tag them, too
    for alternate in Alternate.objects.filter(concept=concept.id):

        if verbose:
            log.append("Checking alias: {}".format(alternate.altLabel))

        searchquery, searchquery_parameters = build_searchquery(
            label=alternate.altLabel,
            query=alternate.query,
            querytype=alternate.query_type)

        count_queries += 1
        count = connector.update_by_query(
            query=searchquery,
            queryparameters=searchquery_parameters,
            data=tagdata)

        if count:
            count_tagged += count

            log.append(
                "Tagged {} yet untagged entries containing alias \"{}\" with tags of the concept \"{}\""
                .format(count, alternate.altLabel, concept.prefLabel))

    # Search aliases and tag them, too
    for hidden in Hidden.objects.filter(concept=concept.id):

        if verbose:
            log.append("Checking hidden label: {}".format(hidden.hiddenLabel))

        searchquery, searchquery_parameters = build_searchquery(
            label=hidden.hiddenLabel,
            query=hidden.query,
            querytype=hidden.query_type)

        count_queries += 1
        count = connector.update_by_query(
            query=searchquery,
            queryparameters=searchquery_parameters,
            data=tagdata)

        if count:
            count_tagged += count

            log.append(
                "Tagged {} yet untagged entries containing hidden label \"{}\" with tags of the concept \"{}\""
                .format(count, alias.label, concept.prefLabel))

    return count_queries, count_tagged, log