Exemple #1
0
    def get_top_raters(self, video):
        video_obj = self.get_video_object(video)
        request = self.context.get("request", {})
        username = search_username_from_request(request)
        if not video_obj:
            qs = UserInformation.objects.none()
        elif username:
            qs = UserInformation.objects.filter(user__username=username)
        else:
            qs = video_obj.certified_top_raters()

        # annotating with whether the rating is public
        pref_privacy = 'user__userpreferences__videoratingprivacy'

        qs = VideoRatingPrivacy._annotate_privacy(
            qs=qs,
            prefix=pref_privacy,
            field_user=None,
            filter_add={f'{pref_privacy}__video': video_obj})

        qs = qs.annotate(
            n_public_rating=Case(When(_is_public=True, then=Value(1)),
                                 default=Value(0),
                                 output_field=IntegerField()))

        return qs
Exemple #2
0
    def test_privacy(self):
        VideoRatingPrivacy.objects.all().delete()

        qs = Video.objects.all()
        qs = VideoRatingPrivacy._annotate_privacy(qs,
                                                  prefix='videoratingprivacy',
                                                  field_user=self.user_prefs,
                                                  default_value=True)
        assert len(qs) == 1
        assert qs[0]._is_public is True

        qs = Video.objects.all()
        qs = VideoRatingPrivacy._annotate_privacy(qs,
                                                  prefix='videoratingprivacy',
                                                  field_user=self.user_prefs,
                                                  default_value=False)
        assert len(qs) == 1
        assert qs[0]._is_public is False

        VideoRatingPrivacy.objects.all().delete()
        VideoRatingPrivacy.objects.create(video=self.video,
                                          user=self.user_prefs,
                                          is_public=True)

        qs = Video.objects.all()
        qs = VideoRatingPrivacy._annotate_privacy(qs,
                                                  prefix='videoratingprivacy',
                                                  field_user=self.user_prefs,
                                                  default_value=True)
        assert len(qs) == 1
        assert qs[0]._is_public is True

        qs = Video.objects.all()
        qs = VideoRatingPrivacy._annotate_privacy(qs,
                                                  prefix='videoratingprivacy',
                                                  field_user=self.user_prefs,
                                                  default_value=False)
        assert len(qs) == 1
        assert qs[0]._is_public is True

        VideoRatingPrivacy.objects.all().delete()
        VideoRatingPrivacy.objects.create(video=self.video,
                                          user=self.user_prefs,
                                          is_public=False)

        qs = Video.objects.all()
        qs = VideoRatingPrivacy._annotate_privacy(qs,
                                                  prefix='videoratingprivacy',
                                                  field_user=self.user_prefs,
                                                  default_value=True)
        assert len(qs) == 1
        assert qs[0]._is_public is False

        qs = Video.objects.all()
        qs = VideoRatingPrivacy._annotate_privacy(qs,
                                                  prefix='videoratingprivacy',
                                                  field_user=self.user_prefs,
                                                  default_value=False)
        assert len(qs) == 1
        assert qs[0]._is_public is False
    def video_rating_statistics(self, request):
        """Get statistical data on video ratings."""
        qs = VideoRating.objects.all()

        # filtering according to the query parameters
        qs = self.filter_queryset(qs)

        # annotate: total score given the preferences
        vector = get_user_preferences(self.request).features_as_vector_centered
        vector = update_preferences_vector_from_request(vector, self.request.query_params)
        qs = qs.annotate(score=get_score_annotation(vector))

        # annotate: public/private rating
        qs = VideoRatingPrivacy._annotate_privacy(
            qs, prefix='video__videoratingprivacy', field_user=F('user'),
            default_value=None, annotate_bool=True, annotate_n=False)

        # either public, or myself
        qs = qs.annotate(_is_public_or_myself=Case(
            When(_is_public=True, then=Value(True)),
            When(user__user__username=request.user.username, then=Value(True)),
            default=Value(False),
            output_field=BooleanField(),
        ))

        # total number of pairwise comparisons by this video by this user
        qs = qs.annotate(n_cmp_1=Count('video__expertrating_video_1', distinct=True,
                                       filter=Q(video__expertrating_video_1__user=F('user'))))
        qs = qs.annotate(n_cmp_2=Count('video__expertrating_video_2', distinct=True,
                                       filter=Q(video__expertrating_video_2__user=F('user'))))

        qs = qs.annotate(n_comparisons=F('n_cmp_1') + F('n_cmp_2'))

        # annotate: for public ones, give the username, for the rest, give None
        qs = qs.annotate(public_username=Case(
            When(_is_public_or_myself=True,
                 then=F('user__user__username')),
            default=Value(None),
            output_field=CharField()))

        # deterministic ordering
        qs = qs.order_by('pk')

        page = self.paginate_queryset(qs)
        if page is not None:
            serializer = VideoRatingsStatisticsSerializerV2(page, many=True)
            return self.get_paginated_response(serializer.data)

        serializer = VideoRatingsStatisticsSerializerV2(qs, many=True)
        return Response(serializer.data)
Exemple #4
0
def search_username_from_request(request):
    """Get the username to use the scores from."""
    if not hasattr(request, 'query_params'):
        return False
    if not isinstance(request.query_params, dict):
        return False

    username = request.query_params.get('search_model', None)
    if username:
        qs = VideoRating.objects.filter(user__user__username=username)
        qs = VideoRatingPrivacy._annotate_privacy(qs=qs)
        qs = qs.filter(_is_public=True)
        n_public_videos = qs.count()
        if username != request.user.username and n_public_videos == 0:
            raise PermissionDenied()
        return username
    return False
Exemple #5
0
def get_public_append_only_database_as_pd():
    """Get the public append-only database."""

    # a horrible hack to make django-pandas work with annotations
    # see https://github.com/chrisdev/django-pandas/blob/master/django_pandas/io.py
    # see https://github.com/chrisdev/django-pandas/issues/124
    # TODO: fix it
    import django
    django.db.models.fields.FieldDoesNotExist = django.core.exceptions.FieldDoesNotExist

    result_df = {}
    default_features = [constants['DEFAULT_PREFS_VAL'] for _ in VIDEO_FIELDS]

    # all videos with the tournesol score and all criteria
    video_df = read_frame(Video.objects.all().annotate(
        score=get_score_annotation(default_features)),
                          fieldnames=['id', 'video_id', 'score'] +
                          VIDEO_FIELDS)

    result_df['all_video_scores'] = video_df

    # all history for ratings, with both videos rated publicly
    qs = HistoricalExpertRating.objects.all()
    for v in '12':
        qs = VideoRatingPrivacy._annotate_privacy(
            qs,
            prefix=f'video_{v}__videoratingprivacy',
            output_prefix=f"_v{v}")
    qs = qs.filter(_v1_is_public=True, _v2_is_public=True)

    result_df['comparison_database'] = read_frame(
        qs,
        fieldnames=[
            'id', 'duration_ms', 'datetime_lastedit', 'datetime_add',
            *VIDEO_FIELDS, *[x + '_weight'
                             for x in VIDEO_FIELDS], 'user__user__username',
            'video_1__video_id', 'video_2__video_id', 'history_id',
            'history_date', 'history_change_reason', 'history_type'
        ])

    # getting all user data (without demo accounts)
    qs = UserInformation.objects.all().filter(is_demo=False)

    # adding _is_certified field
    qs = UserInformation._annotate_is_certified(qs)

    # Even if 'show my profile' is false, export 'username'.
    # If 'show my profile' is true, export 'First name',
    # 'Last name', 'Title', 'Bio',
    # If 'show online presence' is true, export 'Website',
    # 'Linkedin', 'Youtube', 'Google scholar', 'Orcid', 'Researchgate', 'Twitter'.
    # Do NOT share demographic data.

    # only username
    fields_basic = UserInformation.BASIC_FIELDS + ['_is_certified']
    qs1 = qs.filter(show_my_profile=False)
    df1 = read_frame(qs1, fieldnames=fields_basic)

    # username and info
    fields_profile = UserInformation.PROFILE_FIELDS
    qs2 = qs.filter(show_my_profile=True, show_online_presence=False)
    df2 = read_frame(qs2, fieldnames=fields_basic + fields_profile)

    # username, info and online fields
    fields_online = UserInformation.ONLINE_FIELDS
    qs3 = qs.filter(show_my_profile=True, show_online_presence=True)
    df3 = read_frame(qs3,
                     fieldnames=fields_basic + fields_profile + fields_online)

    # all contributors
    df = pd.concat([df1, df2, df3], axis=0, ignore_index=True)

    result_df['contributors_public'] = df

    return result_df
Exemple #6
0
def test_download_privacy_public_database(driver, django_db_blocker):
    """Test that public database is a zip archive, and it only contains public info."""

    create_toy_data(django_db_blocker=django_db_blocker,
                    driver=driver, n_users=30, n_videos=100,
                    n_ratings=30)

    open_tournesol(driver)

    WebDriverWait(driver, TIME_WAIT).until(
        EC.presence_of_element_located((By.ID, "id_public_database_download")))

    link = driver.find_element_by_id('id_public_database_download').get_attribute('href')

    data = get(link)
    assert data.ok
    assert data.content
    assert data.headers['content-type'] == 'application/zip'

    # with open('data.zip', 'wb') as f:
    #     f.write(data.content)

    # reading dataframes
    zip_file = BytesIO(data.content)
    dfs = {}
    with zipfile.ZipFile(zip_file, 'r') as zf:
        for fileinfo in zf.infolist():
            content = zf.read(fileinfo).decode('ascii')
            df = pd.read_csv(StringIO(content))
            dfs[fileinfo.filename] = df

    # print(data.content)

    assert set(dfs.keys()) == set(
        ['comparison_database.csv', 'contributors_public.csv',
         'all_video_scores.csv']
    ), f"Wrong files in archive: {dfs.keys()}"

    # checking comparisons privacy
    df = dfs['comparison_database.csv']
    for _, row in df.iterrows():
        username = row['user__user__username']
        vid1 = row['video_1__video_id']
        vid2 = row['video_2__video_id']

        # both videos must be rated publicly!
        with django_db_blocker.unblock():
            for vid in [vid1, vid2]:
                qs = Video.objects.filter(video_id=vid)
                assert qs.count() == 1, (qs, qs.count())
                up = UserPreferences.objects.get(user__username=username)
                qs = VideoRatingPrivacy._annotate_privacy(qs, prefix="videoratingprivacy",
                                                          field_user=up)
                assert qs.count() == 1, (qs, qs.count())
                assert qs.get()._is_public, qs.values()

        print("Check for", username, vid1, vid2, "successful")

    # checking user information privacy
    df = dfs['contributors_public.csv']
    for _, row in df.iterrows():
        username = row['user__username']

        # checking certification status
        with django_db_blocker.unblock():
            qs = UserInformation.objects.filter(user__username=username)
            assert qs.count() == 1, qs
            qs = UserInformation._annotate_is_certified(qs)
            assert qs.count() == 1, qs
            ui = qs.get()
        assert ui._is_certified == row['_is_certified'], (dict(row), ui)

        # checking show_my_profile
        if not ui.show_my_profile:
            for f in UserInformation.PROFILE_FIELDS:
                assert pd.isna(row[f]), row[f]

        # checking online presence
        if not ui.show_online_presence or not ui.show_my_profile:
            for f in UserInformation.ONLINE_FIELDS:
                assert pd.isna(row[f]), row[f]

        # checking that protected fields are not included
        for f in UserInformation.PROTECTED_FIELDS:
            assert f not in row, (f, row)

        print("Check for", username, "successful")
Exemple #7
0
    def get_queryset(self, pk=None):
        """All videos except for null ones."""
        queryset = Video.objects.filter(is_unlisted=False).values()
        request = self.request

        fields = [x.name for x in Video._meta.fields]
        for f in VIDEO_FIELDS:
            fields.remove(f)

        def get_score_annotation(user_preferences_vector):
            """Returns an sql object annotating queries with the video ratings (sclar product)."""
            return sum(
                [F(f) * v for f, v in zip(VIDEO_FIELDS, user_preferences_vector)])

        features = self.get_features_from_request()
        default_features = [constants['DEFAULT_PREFS_VAL'] for _ in VIDEO_FIELDS]
        search_username = self.need_scores_for_username()

        # computing score inside the database
        if search_username:
            fields_exclude = set(Video.COMPUTED_PROPERTIES)
            fields = [f for f in fields if f not in fields_exclude]

            queryset = queryset.values(*fields)
            queryset = queryset.annotate(**{key: F(f'videorating__{key}') for key in VIDEO_FIELDS},
                                         user=F(
                                             'videorating__user__user__username')).filter(
                user=search_username)

            # for myself, allow showing public/non-public videos
            if search_username == request.user.username:
                is_public = request.query_params.get('show_all_my_videos', 'true') == 'false'
                print(is_public)
            else:  # for other people, only show public videos
                is_public = True

            # keeping only public videos
            if is_public:
                queryset = VideoRatingPrivacy._annotate_privacy(
                    queryset, prefix='videoratingprivacy', field_user=None,
                    filter_add={'videoratingprivacy__user__user__username': search_username}
                )
                queryset = queryset.filter(_is_public=True)

            queryset = queryset.annotate(rating_n_experts=Value(1, IntegerField()))

            q1 = Q(expertrating_video_1__user__user__username=search_username)
            q2 = Q(expertrating_video_2__user__user__username=search_username)

            c1 = Count('expertrating_video_1', q1, distinct=True)
            c2 = Count('expertrating_video_2', q2, distinct=True)

            queryset = queryset.annotate(rating_n_ratings=c1 + c2)

            queryset = queryset.annotate(n_public_experts=Value(1, IntegerField()))
            queryset = queryset.annotate(n_private_experts=Value(0, IntegerField()))

            # TODO: a hack. improve this
            queryset = queryset.annotate(
                    public_experts=Value("", CharField()))

            # logging model usage in search
            if self.request.user.is_authenticated:
                RepresentativeModelUsage.objects.get_or_create(
                    viewer=UserPreferences.objects.get(user__username=self.request.user.username),
                    model=UserPreferences.objects.get(user__username=search_username)
                )

        queryset = queryset.annotate(
            score_preferences_term=get_score_annotation(features))

        queryset = queryset.annotate(
            tournesol_score=get_score_annotation(default_features))

        queryset = queryset.annotate(
            score_search_term_=Value(
                0.0, FloatField()))

        if request.query_params.get('search'):
            # computing the postgres score for search
            if connection.vendor.startswith('postgres'):
                s_query = request.query_params.get('search', '')

                def word_to_query(w):
                    """Convert one word into a query."""
                    queries = []

                    queries.append(SearchQuery(w, search_type='raw'))
                    queries.append(SearchQuery(w + ':*', search_type='raw'))

                    return reduce(lambda x, y: x | y, queries)

                def words_to_query(s_query, max_len=100, max_word_len=20):
                    """Convert a string with words into a SearchQuery."""
                    s_query = s_query[:max_len]
                    s_query = s_query.split(' ')
                    s_query = [''.join(filter(str.isalnum, x)) for x in s_query]
                    s_query = [x for x in s_query if 1 <= len(x) <= max_word_len]
                    s_query = [word_to_query(x) for x in s_query]
                    if not s_query:
                        return SearchQuery('')
                    return reduce(lambda x, y: x & y, s_query)

                s_query = words_to_query(s_query)

                s_vectors = [SearchVector(f, weight=w) for f, w in zip(self.search_fields,
                                                                       self.search_weights)]
                s_vector = reduce(lambda x, y: x + y, s_vectors)

                queryset = queryset.annotate(
                    score_search_term_=SearchRank(s_vector, s_query))
            else:
                # in other databases, using basic filtering
                queryset = filters_.SearchFilter().filter_queryset(self.request, queryset, self)
                queryset = queryset.annotate(
                    score_search_term_=Value(
                        1.0, FloatField()))

        queryset = queryset.annotate(
            score_search_term=F('score_search_term_') *
            VideoSearchEngine.VIDEO_SEARCH_COEFF)
        queryset = queryset.annotate(
            score=F('score_preferences_term') +
            F('score_search_term'))

        return queryset