def setUp(self): super(TestRelatedContent, self).setUp() es = related.get_connection() related.delete(es) related.create(es) index = related.get_index() es.health(index=index, wait_for_status="yellow", wait_for_relocating_shards=0, timeout="5m") # wait for all
def setUp(self): super(TestRelatedContent, self).setUp() es = related.get_connection() related.delete(es) related.create(es) index = related.get_index() es.health( index=index, wait_for_status='yellow', wait_for_relocating_shards=0, # wait for all timeout='5m')
def related_content(request): es = related.get_connection() index = related.get_index() if request.method == 'POST': form = forms.ReindexRelatedContentForm(request.POST) if form.is_valid(): if form.cleaned_data['delete_and_recreate']: try: related.delete(es) except pyelasticsearch.ElasticHttpNotFoundError: pass related.create(es) form.cleaned_data['all'] = True since = None if form.cleaned_data['since']: since = datetime.timedelta( minutes=form.cleaned_data['since'] ) related.index( all=form.cleaned_data['all'], since=since ) messages.success( request, 'Re-indexing issued.' ) return redirect('manage:related_content') else: initial = { 'since': 30, } form = forms.ReindexRelatedContentForm(initial=initial) query = { 'query': { 'match_all': {} } } try: count = es.count(query, index=index)['count'] except pyelasticsearch.ElasticHttpNotFoundError: count = 'no' context = { 'form': form, 'count_indexed': count, 'count_events': Event.objects.scheduled_or_processing().count(), 'index_name': index, } return render(request, 'manage/related_content.html', context)
def find_related_events(event, user, boost_title=None, boost_tags=None, size=None, use_title=True, use_tags=True, explain=False): assert use_title or use_tags if boost_title is None: boost_title = settings.RELATED_CONTENT_BOOST_TITLE if boost_tags is None: boost_tags = settings.RELATED_CONTENT_BOOST_TAGS if size is None: size = settings.RELATED_CONTENT_SIZE index = related.get_index() doc_type = 'event' es = related.get_connection() fields = ['title'] if list(event.channels.all()) != [ Channel.objects.get(slug=settings.DEFAULT_CHANNEL_SLUG) ]: fields.append('channel') mlt_queries = [] if use_title: mlt_queries.append({ 'more_like_this': { 'fields': ['title'], # 'analyzer': 'snowball', 'docs': [{ '_index': index, '_type': doc_type, '_id': event.id }], 'min_term_freq': 1, 'max_query_terms': 20, 'min_doc_freq': 1, # 'max_doc_freq': 2, # 'stop_words': ['your', 'about'], 'boost': boost_title, } }) if use_tags and event.tags.all().exists(): fields.append('tags') mlt_queries.append({ 'more_like_this': { 'fields': ['tags'], 'docs': [{ '_index': index, '_type': doc_type, '_id': event.id }], 'min_term_freq': 1, 'max_query_terms': 20, 'min_doc_freq': 1, 'boost': boost_tags, } }) query_ = { 'bool': { 'should': mlt_queries, } } if user.is_active: if is_contributor(user): query = { 'fields': fields, 'query': query_, 'filter': { 'bool': { 'must_not': { 'term': { 'privacy': Event.PRIVACY_COMPANY } } } } } else: query = {'fields': fields, 'query': query_} else: query = { 'fields': fields, 'query': query_, "filter": { "bool": { "must": { "term": { "privacy": Event.PRIVACY_PUBLIC } } } } } ids = [] query['from'] = 0 query['size'] = size query['explain'] = explain hits = es.search(query, index=index)['hits'] scores = {} explanations = [] for doc in hits['hits']: _id = int(doc['_id']) scores[_id] = doc['_score'] ids.append(_id) if explain: explanations.append(doc['_explanation']) events = Event.objects.scheduled_or_processing().filter(id__in=ids) if user.is_active: if is_contributor(user): events = events.exclude(privacy=Event.PRIVACY_COMPANY) else: events = events.filter(privacy=Event.PRIVACY_PUBLIC) events = sorted(events, key=lambda e: ids.index(e.id)) return (events, scores, explanations)
def find_related_events( event, user, boost_title=None, boost_tags=None, size=None, use_title=True, use_tags=True, explain=False ): assert use_title or use_tags if boost_title is None: boost_title = settings.RELATED_CONTENT_BOOST_TITLE if boost_tags is None: boost_tags = settings.RELATED_CONTENT_BOOST_TAGS if size is None: size = settings.RELATED_CONTENT_SIZE index = related.get_index() doc_type = 'event' es = related.get_connection() fields = ['title'] if list(event.channels.all()) != [ Channel.objects.get(slug=settings.DEFAULT_CHANNEL_SLUG)]: fields.append('channel') mlt_queries = [] if use_title: mlt_queries.append({ 'more_like_this': { 'fields': ['title'], # 'analyzer': 'snowball', 'docs': [ { '_index': index, '_type': doc_type, '_id': event.id }], 'min_term_freq': 1, 'max_query_terms': 20, 'min_doc_freq': 1, # 'max_doc_freq': 2, # 'stop_words': ['your', 'about'], 'boost': boost_title, } }) if use_tags and event.tags.all().exists(): fields.append('tags') mlt_queries.append({ 'more_like_this': { 'fields': ['tags'], 'docs': [ { '_index': index, '_type': doc_type, '_id': event.id }], 'min_term_freq': 1, 'max_query_terms': 20, 'min_doc_freq': 1, 'boost': boost_tags, } }) query_ = { 'bool': { 'should': mlt_queries, } } if user.is_active: if is_contributor(user): query = { 'fields': fields, 'query': query_, 'filter': { 'bool': { 'must_not': { 'term': { 'privacy': Event.PRIVACY_COMPANY } } } } } else: query = { 'fields': fields, 'query': query_ } else: query = { 'fields': fields, 'query': query_, "filter": { "bool": { "must": { "term": {"privacy": Event.PRIVACY_PUBLIC} } } } } ids = [] query['from'] = 0 query['size'] = size query['explain'] = explain hits = es.search(query, index=index)['hits'] scores = {} explanations = [] for doc in hits['hits']: _id = int(doc['_id']) scores[_id] = doc['_score'] ids.append(_id) if explain: explanations.append(doc['_explanation']) events = Event.objects.scheduled_or_processing().filter(id__in=ids) if user.is_active: if is_contributor(user): events = events.exclude(privacy=Event.PRIVACY_COMPANY) else: events = events.filter(privacy=Event.PRIVACY_PUBLIC) events = sorted(events, key=lambda e: ids.index(e.id)) return (events, scores, explanations)