Example #1
0
def whoosh_search(user_id, query_terms):
    ret = {}
    user_artists_profile = get_user_artists_profile(user_id)
    # q = qp.parse(
    #     "(" + query + " ANDMAYBE ((" + ") OR (".join([
    #         (" artist_id:" + str(artist_id) + "^" + str(1 + artist_score)) for (
    #             artist_id, artist_score) in user_artists_profile]) + ")))")
    # see https://pythonhosted.org/Whoosh/api/query.html#whoosh.query.AndMaybe
    q = AndMaybe(
        And([Term('title', qt) for qt in query_terms.split(' ')]),
        Or([Term('artist_id', artist_id, boost=artist_score) for (
            artist_id, artist_score) in user_artists_profile]))
    with ix.searcher() as searcher:
        results = searcher.search(q, limit=10)
        ret = {
            'items': [hit.fields() for hit in results],
            'runtime': results.runtime}
    return ret
Example #2
0
def whoosh_search(user_id, query_terms):
    ret = {}
    user_artists_profile = get_user_artists_profile(user_id)
    # q = qp.parse(
    #     "(" + query + " ANDMAYBE ((" + ") OR (".join([
    #         (" artist_id:" + str(artist_id) + "^" + str(1 + artist_score)) for (
    #             artist_id, artist_score) in user_artists_profile]) + ")))")
    # see https://pythonhosted.org/Whoosh/api/query.html#whoosh.query.AndMaybe
    q = AndMaybe(
        And([Term('title', qt) for qt in query_terms.split(' ')]),
        Or([
            Term('artist_id', artist_id, boost=artist_score)
            for (artist_id, artist_score) in user_artists_profile
        ]))
    with ix.searcher() as searcher:
        results = searcher.search(q, limit=10)
        ret = {
            'items': [hit.fields() for hit in results],
            'runtime': results.runtime
        }
    return ret
Example #3
0
def elasticsearch_search(user_id, query_terms, ix_name="songs"):
    ret = {}
    es = Elasticsearch()
    user_artists_profile = get_user_artists_profile(user_id)
    # https://www.elastic.co/guide/en/elasticsearch/guide/current/bool-query.html
    # The bool query calculates the relevance _score for each document
    # by adding together the _score from all of the matching must and should clauses,
    # and then dividing by the total number of must and should clauses.
    # All the must clauses must match, and all the must_not clauses must not match,
    # but how many should clauses should match? By default,
    # none of the should clauses are required to match, with one exception:
    # if there are no must clauses, then at least one should clause must match.
    query = {
        "query": {
            "bool": {
                "must": {
                    "match": {
                        "title": {
                            "query": query_terms,
                            "operator": "and"
                        }
                    }
                    # slop could be interesting as order of term
                    # seems important in this context
                    # "span_near": {
                    #     "title": {
                    #        "query": query_terms,
                    #        "slop":  50
                    # }
                },
                "should": [{
                    "match": {
                        "artist_id": {
                            "query": str(artist_id),
                            "boost": artist_score
                        }
                    }
                } for artist_id, artist_score in user_artists_profile]
            }
        },
        # this should be slightly more performing once es cache management is right (
        # though incorrect unless window size is large enough)
        # https://www.elastic.co/guide/en/elasticsearch/reference/2.1/search-request-rescore.html
        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#score-functions
        # "rescore": {
        #     "window_size": 100,
        #     "query": {
        #         "rescore_query": {
        #             "function_score": {
        #                 "functions": [
        #                     {
        #                         "filter": {
        #                             "term": {
        #                                 "artist_id": str(artist_id)
        #                             },
        #                         },
        #                         "script_score": {
        #                             "script": "_score + " + str(artist_score)
        #                         }
        #                     }
        #                     for artist_id, artist_score in user_artists_profile
        #                 ],
        #                 "score_mode": "first",
        #                 "boost_mode": "replace"
        #             }
        #         }
        #     }
        # }
    }
    ret = es.search(index=ix_name, body=query)
    return ret
Example #4
0
def elasticsearch_search(user_id, query_terms, ix_name="songs"):
    ret = {}
    es = Elasticsearch()
    user_artists_profile = get_user_artists_profile(user_id)
    # https://www.elastic.co/guide/en/elasticsearch/guide/current/bool-query.html
    # The bool query calculates the relevance _score for each document
    # by adding together the _score from all of the matching must and should clauses,
    # and then dividing by the total number of must and should clauses.
    # All the must clauses must match, and all the must_not clauses must not match,
    # but how many should clauses should match? By default,
    # none of the should clauses are required to match, with one exception:
    # if there are no must clauses, then at least one should clause must match.
    query = {
        "query": {
            "bool": {
                "must": {
                    "match": {
                        "title": {
                            "query": query_terms,
                            "operator": "and"
                        }
                    }
                    # slop could be interesting as order of term
                    # seems important in this context
                    # "span_near": {
                    #     "title": {
                    #        "query": query_terms,
                    #        "slop":  50
                    # }
                },
                "should": [
                    {"match": {
                        "artist_id": {
                            "query": str(artist_id),
                            "boost": artist_score
                        }
                    }} for artist_id, artist_score in user_artists_profile
                ]
            }
        },
        # this should be slightly more performing once es cache management is right (
        # though incorrect unless window size is large enough)
        # https://www.elastic.co/guide/en/elasticsearch/reference/2.1/search-request-rescore.html
        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#score-functions
        # "rescore": {
        #     "window_size": 100,
        #     "query": {
        #         "rescore_query": {
        #             "function_score": {
        #                 "functions": [
        #                     {
        #                         "filter": {
        #                             "term": {
        #                                 "artist_id": str(artist_id)
        #                             },
        #                         },
        #                         "script_score": {
        #                             "script": "_score + " + str(artist_score)
        #                         }
        #                     }
        #                     for artist_id, artist_score in user_artists_profile
        #                 ],
        #                 "score_mode": "first",
        #                 "boost_mode": "replace"
        #             }
        #         }
        #     }
        # }
    }
    ret = es.search(index=ix_name, body=query)
    return ret