Ejemplo n.º 1
0
def search(request):
    if request.method == 'POST':
        form = SearchForm(request.POST)
        if form.is_valid():
            query_terms = form.cleaned_data['q'].split()
            query_terms = [term.lower() for term in query_terms]
            intermediate_results = _find_articles(query_terms)

            # calculate the TF-IDF of each term per document,
            # order results by TF-IDF
            terms = Term.objects.filter(term__in=query_terms)
            ordered_results = []
            for term in terms:
                for doc in intermediate_results:
                    ordered_results.append((tfidf(term, doc), doc))
            ordered_results.sort(reverse=True)

            # strip out duplicate articles without changing the order
            results = _deduplicate_articles(ordered_results)

            # calculate total number of articles for "X of Y documents"
            total_docs = Article.objects.count()

            # Calculate the average TF-IDF for each author in search results.
            # Average TF-IDF includes scores of zero for documents that match
            # term A, but not term B. That is, a doc that matches A will have a
            # TF-IDF of some positive float, but if that same doc does *not*
            # match term B, it will have a TF-IDF of 0 for term B.
            # ordered_results has a list of (TF-IDF, article) tuples of all
            # results, so start with that and create a dictionary with authors
            # as keys and lists of (TF-IDF, article) tuples as values.
            author_totals = {}
            for score, doc in ordered_results:
                for author in doc.authors.all():
                    scores = author_totals.setdefault(author.pk, [])
                    scores.append(score)

            # average the scores per author
            author_averages = []
            total_results = len(ordered_results)
            for author_pk, scores in author_totals.items():
                scores_sum = fsum(scores)
                average = scores_sum / total_results
                author = Author.objects.get(pk=author_pk)
                author_averages.append((author, average))


            return render(request, 'pubmed_search/search.html', {'articles': results,
                                                                 'query_terms': query_terms,
                                                                 'total_documents': total_docs,
                                                                 'author_averages': author_averages})
        else:
            return render(request, 'pubmed_search/search.html', {'query_terms': request.POST})
    else:
        return render(request, 'pubmed_search/search.html')
Ejemplo n.º 2
0
def autosearch(request):
    form = SearchForm(request.GET)
    if form.is_valid():
        query_terms = form.cleaned_data['q'].split()
        results = _find_articles(query_terms)

        c = []
        for article in results:
            c.append({"pk":article.pk, "title":article.title, "url":article.get_absolute_url()})
        content = json.dumps(c)
        return HttpResponse(content, content_type='application/json')