Ejemplo n.º 1
0
    def view(self, request):
        """Get statistics for the website."""
        minmax_scores = \
            Video.objects.aggregate(**{'max_' + f: Max(F(f)) for f in VIDEO_FIELDS},
                                    **{'min_' + f: Min(F(f)) for f in VIDEO_FIELDS})

        try:
            min_score = min(
                [v for k, v in minmax_scores.items() if k.startswith('min')])
            max_score = max(
                [v for k, v in minmax_scores.items() if k.startswith('max')])
        except Exception:
            min_score = 0.0
            max_score = 0.0

        data = {
            'certified_experts':
            UserInformation._annotate_is_certified(
                UserInformation.objects.all()).filter(
                    _is_certified=1, user__is_active=True).count(),
            'pairwise_comparisons':
            ExpertRating.objects.all().count(),
            'videos':
            Video.objects.all().count(),
            'min_score':
            min_score,
            'max_score':
            max_score
        }

        return Response(StatisticsSerializerV2(data, many=False).data)
def sample_certified_users_with_video(video_pks, max_other_users=1, feature_nonan=None,
                                      always_include_username=None):
    """Get a list of usernames which have most ratings on a particular list of videos."""
    qs = UserPreferences.objects.all()

    # removing myself to get more others (will add later)
    if always_include_username is not None:
        qs = qs.exclude(user__username=always_include_username)

    # filter for expert ratings
    expertrating_filter = Q(pk__in=[])
    for video_pk in video_pks:
        expertrating_filter = expertrating_filter | Q(expertrating__video_1__pk=video_pk)
        expertrating_filter = expertrating_filter | Q(expertrating__video_2__pk=video_pk)

    # exclude ratings with a NaN value
    if feature_nonan is not None:
        expertrating_filter = expertrating_filter & Q(
            **{'expertrating__' + feature_nonan + '__isnull': False})

    qs = UserInformation._annotate_is_certified(qs, prefix='user__userinformation__')
    qs = qs.filter(_is_certified=True)
    qs = qs.annotate(_rating_count=Count('expertrating',
                                         filter=expertrating_filter,
                                         distinct=True
                                         ))

    filter_rating_exists = Q(pk__in=[])
    for video_pk in video_pks:
        qs = qs.annotate(
            **{
                f'_v_{video_pk}_nonnull':
                    Count('videorating',
                          filter=Q(videorating__video__pk=video_pk,
                                   **{'videorating__' + feature_nonan + '__isnull': False}))
            })
        filter_rating_exists = filter_rating_exists | Q(**{f'_v_{video_pk}_nonnull__gt': 0})

    # at least one score must be present for any of the videos
    qs = qs.filter(filter_rating_exists)

    # expert ratings must exist
    qs = qs.filter(_rating_count__gt=0)

    # selecting by top ratings
    qs = qs.order_by('-_rating_count')[:max_other_users]

    lst = qs.values_list('user__username')
    usernames_set = set([x[0] for x in lst])

    if always_include_username is not None:
        usernames_set.add(always_include_username)

    return usernames_set
Ejemplo n.º 3
0
    def view(self, request):
        """Get statistics for the website."""
        minmax_scores = \
            Video.objects.aggregate(**{'max_' + f: Max(F(f)) for f in VIDEO_FIELDS},
                                    **{'min_' + f: Min(F(f)) for f in VIDEO_FIELDS})

        try:
            min_score = min(
                [v for k, v in minmax_scores.items() if k.startswith('min')])
            max_score = max(
                [v for k, v in minmax_scores.items() if k.startswith('max')])
        except Exception:
            min_score = 0.0
            max_score = 0.0

        date_week_ago = make_aware(
            datetime.datetime.now()) - datetime.timedelta(days=7)

        data = {
            'certified_experts':
            UserInformation._annotate_is_certified(
                UserInformation.objects.all()).filter(
                    _is_certified=1, user__is_active=True).count(),
            'pairwise_comparisons':
            ExpertRating.objects.all().count(),
            'videos':
            Video.objects.all().count(),
            'min_score':
            min_score,
            'max_score':
            max_score,
            'total_experts':
            UserInformation.objects.filter(is_demo=False).count(),
            'weekly_active_ratings':
            ExpertRating.objects.filter(
                datetime_lastedit__gte=date_week_ago).count(),
            'n_rated_videos':
            Video.objects.exclude(
                Q(expertrating_video_1__id=None)
                & Q(expertrating_video_2__id=None)).distinct().count()
        }

        n_sum_comparisons = 0
        for f in VIDEO_FIELDS:
            val = ExpertRating.objects.filter(**{
                f + '__isnull': False,
                f + '_weight__gt': 0
            }).distinct().count()
            data[f"n_{f}_comparisons"] = val
            n_sum_comparisons += val

        data["n_sum_comparisons"] = n_sum_comparisons

        return Response(StatisticsSerializerV2(data, many=False).data)
Ejemplo n.º 4
0
    def thank_contributors(self, request):
        """Thank contributors for the video."""

        video = get_object_or_404(Video,
                                  video_id=request.query_params.get(
                                      'video_id', ''))
        action = request.query_params.get('action', "")

        user = get_object_or_404(UserPreferences,
                                 user__username=request.user.username)

        if action == 'unthank':
            n_deleted, _ = VideoRatingThankYou.objects.filter(
                thanks_from=user, video=video).delete()
            return Response({
                'status': 'deleted',
                'n_deleted': n_deleted
            },
                            status=201)
        elif action == 'thank':
            qs = UserPreferences.objects.all()

            # only keeping people who rated the video
            qs = qs.annotate(n_video=Count(
                'expertrating',
                Q(expertrating__video_1=video)
                | Q(expertrating__video_2=video)))
            qs = qs.filter(n_video__gte=1)

            # and who are certified
            qs = UserInformation._annotate_is_certified(
                qs, prefix="user__userinformation__")
            qs = qs.filter(_is_certified=True)

            # removing yourself...
            qs = qs.exclude(id=user.id)

            contributors = qs.distinct()

            entries = [
                VideoRatingThankYou(thanks_from=user,
                                    video=video,
                                    thanks_to=contributor)
                for contributor in contributors
            ]
            VideoRatingThankYou.objects.bulk_create(entries,
                                                    ignore_conflicts=True)
        else:
            return Response({'reason': f'Wrong action [{action}]'}, status=400)

        return Response({'status': 'success'}, status=201)
Ejemplo n.º 5
0
    def search_username(self, request):
        search_query = request.query_params.get('search_query', '')
        try:
            limit = int(request.query_params.get('limit', 20))
            assert limit >= 0, "Limit must be positive"
        except Exception as e:
            return Response({'reason': str(e)}, status=400)

        qs = UserInformation.objects.all()
        qs = UserInformation._annotate_is_certified(qs)
        qs = qs.filter(_is_certified=True)
        qs = qs.filter(user__username__icontains=search_query)
        qs = qs.order_by('user__username')
        qs = qs[:limit]
        serializer = OnlyUsernameSerializer(qs, many=True)
        return Response({
            'results': serializer.data,
            'count': len(qs),
            'previous': None,
            'next': None
        })
Ejemplo n.º 6
0
def get_public_append_only_database_as_pd():
    """Get the public append-only database."""

    # a horrible hack to make django-pandas work with annotations
    # see https://github.com/chrisdev/django-pandas/blob/master/django_pandas/io.py
    # see https://github.com/chrisdev/django-pandas/issues/124
    # TODO: fix it
    import django
    django.db.models.fields.FieldDoesNotExist = django.core.exceptions.FieldDoesNotExist

    result_df = {}
    default_features = [constants['DEFAULT_PREFS_VAL'] for _ in VIDEO_FIELDS]

    # all videos with the tournesol score and all criteria
    video_df = read_frame(Video.objects.all().annotate(
        score=get_score_annotation(default_features)),
                          fieldnames=['id', 'video_id', 'score'] +
                          VIDEO_FIELDS)

    result_df['all_video_scores'] = video_df

    # all history for ratings, with both videos rated publicly
    qs = HistoricalExpertRating.objects.all()
    for v in '12':
        qs = VideoRatingPrivacy._annotate_privacy(
            qs,
            prefix=f'video_{v}__videoratingprivacy',
            output_prefix=f"_v{v}")
    qs = qs.filter(_v1_is_public=True, _v2_is_public=True)

    result_df['comparison_database'] = read_frame(
        qs,
        fieldnames=[
            'id', 'duration_ms', 'datetime_lastedit', 'datetime_add',
            *VIDEO_FIELDS, *[x + '_weight'
                             for x in VIDEO_FIELDS], 'user__user__username',
            'video_1__video_id', 'video_2__video_id', 'history_id',
            'history_date', 'history_change_reason', 'history_type'
        ])

    # getting all user data (without demo accounts)
    qs = UserInformation.objects.all().filter(is_demo=False)

    # adding _is_certified field
    qs = UserInformation._annotate_is_certified(qs)

    # Even if 'show my profile' is false, export 'username'.
    # If 'show my profile' is true, export 'First name',
    # 'Last name', 'Title', 'Bio',
    # If 'show online presence' is true, export 'Website',
    # 'Linkedin', 'Youtube', 'Google scholar', 'Orcid', 'Researchgate', 'Twitter'.
    # Do NOT share demographic data.

    # only username
    fields_basic = UserInformation.BASIC_FIELDS + ['_is_certified']
    qs1 = qs.filter(show_my_profile=False)
    df1 = read_frame(qs1, fieldnames=fields_basic)

    # username and info
    fields_profile = UserInformation.PROFILE_FIELDS
    qs2 = qs.filter(show_my_profile=True, show_online_presence=False)
    df2 = read_frame(qs2, fieldnames=fields_basic + fields_profile)

    # username, info and online fields
    fields_online = UserInformation.ONLINE_FIELDS
    qs3 = qs.filter(show_my_profile=True, show_online_presence=True)
    df3 = read_frame(qs3,
                     fieldnames=fields_basic + fields_profile + fields_online)

    # all contributors
    df = pd.concat([df1, df2, df3], axis=0, ignore_index=True)

    result_df['contributors_public'] = df

    return result_df
Ejemplo n.º 7
0
def test_download_privacy_public_database(driver, django_db_blocker):
    """Test that public database is a zip archive, and it only contains public info."""

    create_toy_data(django_db_blocker=django_db_blocker,
                    driver=driver, n_users=30, n_videos=100,
                    n_ratings=30)

    open_tournesol(driver)

    WebDriverWait(driver, TIME_WAIT).until(
        EC.presence_of_element_located((By.ID, "id_public_database_download")))

    link = driver.find_element_by_id('id_public_database_download').get_attribute('href')

    data = get(link)
    assert data.ok
    assert data.content
    assert data.headers['content-type'] == 'application/zip'

    # with open('data.zip', 'wb') as f:
    #     f.write(data.content)

    # reading dataframes
    zip_file = BytesIO(data.content)
    dfs = {}
    with zipfile.ZipFile(zip_file, 'r') as zf:
        for fileinfo in zf.infolist():
            content = zf.read(fileinfo).decode('ascii')
            df = pd.read_csv(StringIO(content))
            dfs[fileinfo.filename] = df

    # print(data.content)

    assert set(dfs.keys()) == set(
        ['comparison_database.csv', 'contributors_public.csv',
         'all_video_scores.csv']
    ), f"Wrong files in archive: {dfs.keys()}"

    # checking comparisons privacy
    df = dfs['comparison_database.csv']
    for _, row in df.iterrows():
        username = row['user__user__username']
        vid1 = row['video_1__video_id']
        vid2 = row['video_2__video_id']

        # both videos must be rated publicly!
        with django_db_blocker.unblock():
            for vid in [vid1, vid2]:
                qs = Video.objects.filter(video_id=vid)
                assert qs.count() == 1, (qs, qs.count())
                up = UserPreferences.objects.get(user__username=username)
                qs = VideoRatingPrivacy._annotate_privacy(qs, prefix="videoratingprivacy",
                                                          field_user=up)
                assert qs.count() == 1, (qs, qs.count())
                assert qs.get()._is_public, qs.values()

        print("Check for", username, vid1, vid2, "successful")

    # checking user information privacy
    df = dfs['contributors_public.csv']
    for _, row in df.iterrows():
        username = row['user__username']

        # checking certification status
        with django_db_blocker.unblock():
            qs = UserInformation.objects.filter(user__username=username)
            assert qs.count() == 1, qs
            qs = UserInformation._annotate_is_certified(qs)
            assert qs.count() == 1, qs
            ui = qs.get()
        assert ui._is_certified == row['_is_certified'], (dict(row), ui)

        # checking show_my_profile
        if not ui.show_my_profile:
            for f in UserInformation.PROFILE_FIELDS:
                assert pd.isna(row[f]), row[f]

        # checking online presence
        if not ui.show_online_presence or not ui.show_my_profile:
            for f in UserInformation.ONLINE_FIELDS:
                assert pd.isna(row[f]), row[f]

        # checking that protected fields are not included
        for f in UserInformation.PROTECTED_FIELDS:
            assert f not in row, (f, row)

        print("Check for", username, "successful")
Ejemplo n.º 8
0
def accessible_model_filter(queryset, username):
    """List of accessible for search models."""
    queryset = UserInformation._annotate_n_public_videos(queryset)
    queryset = queryset.filter((Q(is_demo=False) & Q(_n_public_videos__gte=1))
                               | Q(user__username=username))
    return queryset