Exemple #1
0
def text_only_search(text):
    """
    performs a search and ranks results based on the text given (splits into individual words)
    """
    search_words = text.split()
    
    #remove duplicates
    search_words = list(set(search_words))
    
    results = []
    ed = EditDistance(Keywords, 'keyword')

    for word in search_words:
        if word not in STOP_WORDS:
            ed_list, ed_diff = ed.correct(word)
            for res in ed_list:
                cur_res = Keywords.objects.extra(select={'diff':"%s"}, select_params=[ed_diff]).filter(keyword__exact=res).order_by('diff','-frequency')
                results = list(chain(results, cur_res))

    # now we need to rank the results for images based on most exact matches
    ranked_res = rank_results(results)

    return ranked_res
Exemple #2
0
def text_only_search(text):
    """
    performs a search and ranks results based on the text given (splits into individual words)
    """
    search_words = text.split()
    
    #remove duplicates
    search_words = list(set(search_words))
    
    results = []
    ed = EditDistance(Keywords, 'keyword')

    for word in search_words:
        if word not in STOP_WORDS:
            #exact and substring keyword matches incase-sensitive
            #####
            #cur_res = Keywords.objects.extra(select={'diff':"length(keyword)-length(%s)"}, select_params=[word]).filter(keyword__contains=word).order_by('diff','-frequency')
            #results = list(chain(results, cur_res))
            #print cur_res
            ####
            ed_list, ed_diff = ed.correct(word)
            for res in ed_list:
                #### Advanced search features for later
                #cur_res = Keywords.objects.extra(select={'diff':"length(keyword)-length(%s)+%d"}, select_params=[res, diff]).filter(keyword__contains=word)order_by('diff','-frequency')
                ####
                cur_res = Keywords.objects.extra(select={'diff':"%s"}, select_params=[ed_diff]).filter(keyword__exact=res).order_by('diff','-frequency')
                results = list(chain(results, cur_res))
                print cur_res
            #for res in cur_res:
            #   for kw in res:
            #       print "Keyword %s has frequency %d and diff %d" % (res.keyword, res.frequency, res.diff)

    print results
    #now we need to rank the results for images based on most exact matches
    ranked_res = rank_results(results)

    return ranked_res