Beispiel #1
0
    def setUp(self):
        super(TestRelatedContent, self).setUp()

        es = related.get_connection()
        related.delete(es)
        related.create(es)
        index = related.get_index()
        es.health(index=index, wait_for_status="yellow", wait_for_relocating_shards=0, timeout="5m")  # wait for all
Beispiel #2
0
    def setUp(self):
        super(TestRelatedContent, self).setUp()

        es = related.get_connection()
        related.delete(es)
        related.create(es)
        index = related.get_index()
        es.health(
            index=index,
            wait_for_status='yellow',
            wait_for_relocating_shards=0,  # wait for all
            timeout='5m')
Beispiel #3
0
def related_content(request):
    es = related.get_connection()
    index = related.get_index()

    if request.method == 'POST':
        form = forms.ReindexRelatedContentForm(request.POST)
        if form.is_valid():
            if form.cleaned_data['delete_and_recreate']:
                try:
                    related.delete(es)
                except pyelasticsearch.ElasticHttpNotFoundError:
                    pass
                related.create(es)
                form.cleaned_data['all'] = True

            since = None
            if form.cleaned_data['since']:
                since = datetime.timedelta(
                    minutes=form.cleaned_data['since']
                )
            related.index(
                all=form.cleaned_data['all'],
                since=since
            )
            messages.success(
                request,
                'Re-indexing issued.'
            )
            return redirect('manage:related_content')

    else:
        initial = {
            'since': 30,
        }
        form = forms.ReindexRelatedContentForm(initial=initial)

    query = {
        'query': {
            'match_all': {}
        }
    }
    try:
        count = es.count(query, index=index)['count']
    except pyelasticsearch.ElasticHttpNotFoundError:
        count = 'no'
    context = {
        'form': form,
        'count_indexed': count,
        'count_events': Event.objects.scheduled_or_processing().count(),
        'index_name': index,
    }
    return render(request, 'manage/related_content.html', context)
Beispiel #4
0
def related_content(request):
    es = related.get_connection()
    index = related.get_index()

    if request.method == 'POST':
        form = forms.ReindexRelatedContentForm(request.POST)
        if form.is_valid():
            if form.cleaned_data['delete_and_recreate']:
                try:
                    related.delete(es)
                except pyelasticsearch.ElasticHttpNotFoundError:
                    pass
                related.create(es)
                form.cleaned_data['all'] = True

            since = None
            if form.cleaned_data['since']:
                since = datetime.timedelta(
                    minutes=form.cleaned_data['since']
                )
            related.index(
                all=form.cleaned_data['all'],
                since=since
            )
            messages.success(
                request,
                'Re-indexing issued.'
            )
            return redirect('manage:related_content')

    else:
        initial = {
            'since': 30,
        }
        form = forms.ReindexRelatedContentForm(initial=initial)

    query = {
        'query': {
            'match_all': {}
        }
    }
    try:
        count = es.count(query, index=index)['count']
    except pyelasticsearch.ElasticHttpNotFoundError:
        count = 'no'
    context = {
        'form': form,
        'count_indexed': count,
        'count_events': Event.objects.scheduled_or_processing().count(),
        'index_name': index,
    }
    return render(request, 'manage/related_content.html', context)
Beispiel #5
0
def find_related_events(event,
                        user,
                        boost_title=None,
                        boost_tags=None,
                        size=None,
                        use_title=True,
                        use_tags=True,
                        explain=False):
    assert use_title or use_tags
    if boost_title is None:
        boost_title = settings.RELATED_CONTENT_BOOST_TITLE
    if boost_tags is None:
        boost_tags = settings.RELATED_CONTENT_BOOST_TAGS
    if size is None:
        size = settings.RELATED_CONTENT_SIZE
    index = related.get_index()
    doc_type = 'event'

    es = related.get_connection()

    fields = ['title']
    if list(event.channels.all()) != [
            Channel.objects.get(slug=settings.DEFAULT_CHANNEL_SLUG)
    ]:
        fields.append('channel')

    mlt_queries = []
    if use_title:
        mlt_queries.append({
            'more_like_this': {
                'fields': ['title'],
                # 'analyzer': 'snowball',
                'docs': [{
                    '_index': index,
                    '_type': doc_type,
                    '_id': event.id
                }],
                'min_term_freq': 1,
                'max_query_terms': 20,
                'min_doc_freq': 1,
                # 'max_doc_freq': 2,
                # 'stop_words': ['your', 'about'],
                'boost': boost_title,
            }
        })
    if use_tags and event.tags.all().exists():
        fields.append('tags')
        mlt_queries.append({
            'more_like_this': {
                'fields': ['tags'],
                'docs': [{
                    '_index': index,
                    '_type': doc_type,
                    '_id': event.id
                }],
                'min_term_freq': 1,
                'max_query_terms': 20,
                'min_doc_freq': 1,
                'boost': boost_tags,
            }
        })

    query_ = {
        'bool': {
            'should': mlt_queries,
        }
    }

    if user.is_active:
        if is_contributor(user):
            query = {
                'fields': fields,
                'query': query_,
                'filter': {
                    'bool': {
                        'must_not': {
                            'term': {
                                'privacy': Event.PRIVACY_COMPANY
                            }
                        }
                    }
                }
            }
        else:
            query = {'fields': fields, 'query': query_}
    else:
        query = {
            'fields': fields,
            'query': query_,
            "filter": {
                "bool": {
                    "must": {
                        "term": {
                            "privacy": Event.PRIVACY_PUBLIC
                        }
                    }
                }
            }
        }

    ids = []
    query['from'] = 0
    query['size'] = size
    query['explain'] = explain
    hits = es.search(query, index=index)['hits']

    scores = {}
    explanations = []
    for doc in hits['hits']:
        _id = int(doc['_id'])
        scores[_id] = doc['_score']
        ids.append(_id)
        if explain:
            explanations.append(doc['_explanation'])

    events = Event.objects.scheduled_or_processing().filter(id__in=ids)

    if user.is_active:
        if is_contributor(user):
            events = events.exclude(privacy=Event.PRIVACY_COMPANY)
    else:
        events = events.filter(privacy=Event.PRIVACY_PUBLIC)

    events = sorted(events, key=lambda e: ids.index(e.id))

    return (events, scores, explanations)
Beispiel #6
0
def find_related_events(
    event, user, boost_title=None, boost_tags=None, size=None,
    use_title=True, use_tags=True, explain=False
):
    assert use_title or use_tags
    if boost_title is None:
        boost_title = settings.RELATED_CONTENT_BOOST_TITLE
    if boost_tags is None:
        boost_tags = settings.RELATED_CONTENT_BOOST_TAGS
    if size is None:
        size = settings.RELATED_CONTENT_SIZE
    index = related.get_index()
    doc_type = 'event'

    es = related.get_connection()

    fields = ['title']
    if list(event.channels.all()) != [
            Channel.objects.get(slug=settings.DEFAULT_CHANNEL_SLUG)]:
        fields.append('channel')

    mlt_queries = []
    if use_title:
        mlt_queries.append({
            'more_like_this': {
                'fields': ['title'],
                # 'analyzer': 'snowball',
                'docs': [
                    {
                        '_index': index,
                        '_type': doc_type,
                        '_id': event.id
                    }],
                'min_term_freq': 1,
                'max_query_terms': 20,
                'min_doc_freq': 1,
                # 'max_doc_freq': 2,
                # 'stop_words': ['your', 'about'],
                'boost': boost_title,
            }
        })
    if use_tags and event.tags.all().exists():
        fields.append('tags')
        mlt_queries.append({
            'more_like_this': {
                'fields': ['tags'],
                'docs': [
                    {
                        '_index': index,
                        '_type': doc_type,
                        '_id': event.id
                    }],
                'min_term_freq': 1,
                'max_query_terms': 20,
                'min_doc_freq': 1,
                'boost': boost_tags,
            }
        })

    query_ = {
        'bool': {
            'should': mlt_queries,
        }
    }

    if user.is_active:
        if is_contributor(user):
            query = {
                'fields': fields,
                'query': query_,
                'filter': {
                    'bool': {
                        'must_not': {
                            'term': {
                                'privacy': Event.PRIVACY_COMPANY
                            }
                        }
                    }
                }
            }
        else:
            query = {
                'fields': fields,
                'query': query_
            }
    else:
        query = {
            'fields': fields,
            'query': query_,
            "filter": {
                "bool": {
                    "must": {
                        "term": {"privacy": Event.PRIVACY_PUBLIC}
                    }
                }
            }
        }

    ids = []
    query['from'] = 0
    query['size'] = size
    query['explain'] = explain
    hits = es.search(query, index=index)['hits']

    scores = {}
    explanations = []
    for doc in hits['hits']:
        _id = int(doc['_id'])
        scores[_id] = doc['_score']
        ids.append(_id)
        if explain:
            explanations.append(doc['_explanation'])

    events = Event.objects.scheduled_or_processing().filter(id__in=ids)

    if user.is_active:
        if is_contributor(user):
            events = events.exclude(privacy=Event.PRIVACY_COMPANY)
    else:
        events = events.filter(privacy=Event.PRIVACY_PUBLIC)

    events = sorted(events, key=lambda e: ids.index(e.id))

    return (events, scores, explanations)