def user_event_query_builder(user_id, event_id): client = Elasticsearch(settings.ELASTICSEARCH_URL) index = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME'] location = get_user_location(user_id) fs = FilterState.objects.filter(user_id=user_id) distance_unit = fs[0].distance_unit[:2] if fs else "mi" body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "ids": { "type": "modelresult", "values": [event_id] } }] } } } }, "sort": [{ "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": distance_unit } }] } response = client.search(index=index, body=body, size=50) return response
def filter(current_user, user_ids): users = [] for user_id in user_ids: users.append('members.facebookcustomuseractive.%s' % user_id) likes = list( FacebookLike.objects.filter(user_id=current_user.id).values_list( 'facebook_id', flat=True)) stop_words = StopWords.objects.all().values_list('word', flat=True) query = ElasticSearchMatchEngineManager.prepare_query( current_user, stop_words) fields = ["goals", "offers", "interests"] location = get_user_location(current_user.id) client = Elasticsearch(settings.ELASTICSEARCH_URL) index = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME'] fs = FilterState.objects.filter(user_id=current_user.id) distance_unit = fs[0].distance_unit[:2] if fs else "mi" likes_ids = likes if likes else [] multi_match_query = { "bool": { "should": [{ "multi_match": { "fields": fields, "query": query } }, { "terms": { "likes_fb_ids": likes_ids } }] } } body = { "highlight": { "fields": { "goals": {}, "interests": {}, "offers": {} } }, "query": { "filtered": { "query": multi_match_query, "filter": { "bool": { "must": [{ "ids": { "type": "modelresult", "values": users } }] } } } }, "sort": [{ "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": distance_unit } }] } response = client.search(index=index, body=body, size=100) return response['hits']['hits']
def event_query_builder(user, event_ids, query=None, is_filter=False, stop_words=()): client = Elasticsearch(settings.ELASTICSEARCH_URL) index = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME'] location = get_user_location(user.id) fs = FilterState.objects.filter(user=user) distance_unit = fs[0].distance_unit[:2] if fs else "mi" keyword_predicate = {} if is_filter: distance_predicate = {} keyword_predicate = [] if fs[0].keyword: s_words = [] keywords = fs[0].keyword.split(',') porter_stemmer = PorterStemmer() s_stop_words = [porter_stemmer.stem(w) for w in stop_words] for word in keywords: for sub_word in word.split(): s_word = porter_stemmer.stem(sub_word.lower()) if s_word not in s_stop_words: s_words.append(s_word) if s_words: keyword_predicate.append({"terms": {"name": s_words}}) keyword_predicate.append( {"terms": { "description": s_words }}) if fs[0].distance: location = get_user_location(user.id) distance_predicate = { "geo_distance": { "distance": "{0}{1}".format(fs[0].distance, fs[0].distance_unit), "location": { "lat": location.y, "lon": location.x } } } body = { "highlight": { "fields": { "name": {}, "description": {} } }, "query": { "filtered": { "filter": { "bool": { "must": [{ "ids": { "type": "modelresult", "values": event_ids } }, distance_predicate], "should": keyword_predicate, "must_not": [] } } } }, "sort": [{ "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": distance_unit } }] } if query is not None \ and fs[0].order_criteria == 'event_score': body["query"]["filtered"]["query"] = { "multi_match": { "fields": ["name", "description"], "query": query } } else: body = { "highlight": { "fields": { "goals": {}, "interests": {}, "likes": {}, "offers": {} } }, "query": { "filtered": { "filter": { "bool": { "must": [{ "ids": { "type": "modelresult", "values": event_ids } }], "should": [], "must_not": [] } } } }, "sort": [{ "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": distance_unit } }] } response = client.search(index=index, body=body, size=50) return response
def query_builder(user, query, fields, exclude_user_ids, stop_words, is_filter=False, friends_list=(), friends=False, likes=None): client = Elasticsearch(settings.ELASTICSEARCH_URL) index = settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME'] body = {} likes_ids = likes if likes else [] location = get_user_location(user.id) fs = FilterState.objects.filter(user=user) distance_unit = fs[0].distance_unit[:2] if fs else "mi" friends_predicate = {} porter_stemmer = PorterStemmer() s_stop_words = [porter_stemmer.stem(w) for w in stop_words] if friends_list or friends: friends_predicate = { "ids": { "type": "modelresult", "values": friends_list } } multi_match_query = { "bool": { "should": [{ "multi_match": { "fields": fields, "query": query } }, { "terms": { "likes_fb_ids": likes_ids } }] } } if is_filter: gender_predicate = {} age_predicate = {} distance_predicate = {} if fs: age_predicate = { "range": { "age": { "gte": fs[0].min_age, "lte": fs[0].max_age } } } if fs[0].gender in ('m,f', 'f,m'): gender_predicate = [] else: gender_predicate = [{"term": {"gender": fs[0].gender}}] if fs[0].keyword: s_words = [] keywords = fs[0].keyword.split(',') for word in keywords: for sub_word in word.split(): s_word = porter_stemmer.stem(sub_word.lower()) if s_word not in s_stop_words: s_words.append(s_word) if s_words: gender_predicate.append({"terms": {"goals": s_words}}) gender_predicate.append({"terms": {"offers": s_words}}) gender_predicate.append( {"terms": { "interests": s_words }}) if fs[0].distance: location = get_user_location(user.id) distance_predicate = { "geo_distance": { "distance": "{0}{1}".format(fs[0].distance, fs[0].distance_unit), "location": { "lat": location.y, "lon": location.x } } } body = { "highlight": { "fields": { "goals": {}, "interests": {}, "offers": {} } }, "query": { "filtered": { "query": multi_match_query, "filter": { "bool": { "must_not": [{ "ids": { "type": "modelresult", "values": exclude_user_ids } }], "should": gender_predicate, "must": [ age_predicate, distance_predicate, friends_predicate ] } } } }, "sort": [{ "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": distance_unit } }] } response = client.search(index=index, body=body, size=100) else: body = { "highlight": { "fields": { "goals": {}, "interests": {}, "offers": {} } }, "query": { "filtered": { "query": multi_match_query, "filter": { "bool": { "must_not": [{ "ids": { "type": "modelresult", "values": exclude_user_ids } }], "should": [], "must": [friends_predicate] } } } }, "sort": [{ "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": distance_unit } }] } response = client.search(index=index, body=body, size=100) return response
def match_between(user_id1, user_id2): from nltk.stem.porter import PorterStemmer porter_stemmer = PorterStemmer() user = FacebookCustomUserActive.objects.get(pk=user_id1) goals = user.goal_set.all() offers = user.offer_set.all() interests = user.interest_set.all() likes = FacebookLikeProxy.objects.filter(user_id=user.id) words = set() for subject in itertools.chain(goals, offers, interests, likes): words |= set( unicode(subject).lower().translate( remove_punctuation_map).split()) stop_words = StopWords.objects.all().values_list('word', flat=True) st_stop_words = [porter_stemmer.stem(w) for w in stop_words] removed_stopwords = [ word for word in words if porter_stemmer.stem(word) not in st_stop_words ] query = ' '.join(removed_stopwords) fields = ["goals", "offers", "interests", "likes"] exclude_user_ids = ['members.facebookcustomuseractive.%s' % user_id1] location = get_user_location(user.id) sorting = { "_geo_distance": { "location": { "lat": location.y, "lon": location.x }, "order": "asc", "unit": "mi" } } client = Elasticsearch(settings.ELASTICSEARCH_URL) s = Search(using=client, index=settings.HAYSTACK_CONNECTIONS['default'][ 'INDEX_NAME']) \ .query(Q("multi_match", query=query, fields=fields)) \ .filter(F("ids", type="modelresult", values=[ 'members.facebookcustomuseractive.%s' % user_id2])) \ .filter(~F("ids", type="modelresult", values=exclude_user_ids)) \ .highlight(*fields) \ .sort(sorting) response = s.execute() s1 = Search(using=client, index=settings.HAYSTACK_CONNECTIONS['default'][ 'INDEX_NAME']) \ .filter(F("ids", type="modelresult", values=[ 'members.facebookcustomuseractive.%s' % user_id2])). \ sort(sorting) response1 = s1.execute() return response.hits.hits or response1.hits.hits
def prepare_location(self, obj): location = get_user_location(obj.id) return {"lat": location.y, "lon": location.x}