Beispiel #1
0
    def test_08_dataset_counts_all(self):
        """08 - Test all dataset counts by month"""
        print (self.test_08_dataset_counts_all.__doc__)

        stats_maker = StatsMakerDatasets()

        r = stats_maker.get_dataset_counts_by_create_date()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 16)

        # check 1st month
        first_month = {'count': 36,
             'month_name': 'April',
             'month_name_short': 'Apr',
             'month_num': 4,
             'running_total': 36,
             'year_num': 2015,
             'yyyy_mm': '2015-04'}
        self.assertEqual(dict(r.result_data['records'][0]), first_month)

        # check last month
        last_month = {'count': 98,
             'month_name': 'July',
             'month_name_short': 'Jul',
             'month_num': 7,
             'running_total': 570,
             'year_num': 2016,
             'yyyy_mm': '2016-07'}
        self.assertEqual(dict(r.result_data['records'][-1]), last_month)
Beispiel #2
0
    def test_08_dataset_counts_all(self):
        """08 - Test all dataset counts by month"""
        print(self.test_08_dataset_counts_all.__doc__)

        stats_maker = StatsMakerDatasets()

        r = stats_maker.get_dataset_counts_by_create_date()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 16)

        # check 1st month
        first_month = {
            'count': 36,
            'month_name': 'April',
            'month_name_short': 'Apr',
            'month_num': 4,
            'running_total': 36,
            'year_num': 2015,
            'yyyy_mm': '2015-04'
        }
        self.assertEqual(dict(r.result_data['records'][0]), first_month)

        # check last month
        last_month = {
            'count': 98,
            'month_name': 'July',
            'month_name_short': 'Jul',
            'month_num': 7,
            'running_total': 570,
            'year_num': 2016,
            'yyyy_mm': '2016-07'
        }
        self.assertEqual(dict(r.result_data['records'][-1]), last_month)
    def get_stats_result(self, request):
        """Return the StatsResult object for this statistic"""
        stats_datasets = StatsMakerDatasets(**request.GET.dict())

        pub_state = self.get_pub_state(request)

        if pub_state == self.PUB_STATE_ALL:
            stats_result = stats_datasets.get_dataset_count()
        elif pub_state == self.PUB_STATE_UNPUBLISHED:
            stats_result = stats_datasets.get_dataset_count_unpublished()
        else:
            stats_result = stats_datasets.get_dataset_count_published()

        return stats_result
Beispiel #4
0
    def get_stats_result(self, request):
        """Return the StatsResult object for this statistic"""
        stats_datasets = StatsMakerDatasets(**request.GET.dict())

        pub_state = self.get_pub_state(request)

        if pub_state == self.PUB_STATE_ALL:
            stats_result = stats_datasets.get_dataset_subject_counts()
        elif pub_state == self.PUB_STATE_UNPUBLISHED:
            stats_result = stats_datasets.get_dataset_subject_counts_unpublished()
        else:
            stats_result = stats_datasets.get_dataset_subject_counts_published()

        return stats_result
Beispiel #5
0
def get_total_published_counts():
    """Get total counts for published dataverses, datasets, and files"""

    stats_dvs = StatsMakerDataverses()
    stats_ds = StatsMakerDatasets()
    stats_files = StatsMakerFiles()

    params = dict(include_pre_dv4_downloads=True)

    d = dict(total_dataverses=stats_dvs.get_dataverse_count_published().result_data['count'],\
            total_datasets=stats_ds.get_dataset_count_published().result_data['count'],
            total_files=stats_files.get_datafile_count_published().result_data['count'],\
            total_downloads=stats_files.get_total_file_downloads(**params).result_data['count'],\
            )

    return d
Beispiel #6
0
    def test_05_dataset_total_counts(self):
        """05 - Count total datasets: published, unpublished, all"""
        print(self.test_05_dataset_total_counts.__doc__)

        kwargs = dict(start_date='2016-01-01')
        stats_maker = StatsMakerDatasets(**kwargs)

        # Count published dataset
        r = stats_maker.get_dataset_count_published()
        self.assertEqual(r.result_data['count'], 85)

        # Count unpublished dataset
        r = stats_maker.get_dataset_count_unpublished()
        self.assertEqual(r.result_data['count'], 198)

        # Count all datasets
        r = stats_maker.get_dataset_count()
        self.assertEqual(r.result_data['count'], 283)
Beispiel #7
0
    def test_05_dataset_total_counts(self):
        """05 - Count total datasets: published, unpublished, all"""
        print (self.test_05_dataset_total_counts.__doc__)

        kwargs=dict(start_date='2016-01-01')
        stats_maker = StatsMakerDatasets(**kwargs)

        # Count published dataset
        r = stats_maker.get_dataset_count_published()
        self.assertEqual(r.result_data['count'], 85)

        # Count unpublished dataset
        r = stats_maker.get_dataset_count_unpublished()
        self.assertEqual(r.result_data['count'], 198)

        # Count all datasets
        r = stats_maker.get_dataset_count()
        self.assertEqual(r.result_data['count'], 283)
Beispiel #8
0
def view_public_visualizations(request, **kwargs):
    """
    Return HTML/D3Plus visualizations for a variety of public statistics
    """
    if EASY_STATISTICS:

        if request.method == "POST":
            form = Metrics(request.POST)
        else:
            form = Metrics()
        kwargs["category"] = form.data.get("category", "audience")
        kwargs["start_date"] = form.data.get("start_date", "2008-01-01")
        kwargs["end_date"] = form.data.get("end_date", "2017-12-31")
        kwargs["cumulative"] = form.data.get("cumulative", "cumulative_period")
        kwargs["downloads"] = form.data.get("downloads", "files")
        kwargs["date_type"] = form.data.get("date_type", "publish")
        kwargs["bulk_import_included"] = form.data.get("bulk_import_included", "bulk_included")
        noncumulative = kwargs.get('cumulative', None) == 'noncumulative'

        if form.data.get('excel'):
            parameters = OrderedDict()
            parameters['start date'] = kwargs["start_date"]
            parameters['end date'] = kwargs["end_date"]
            parameters['date type'] = kwargs["date_type"]
            parameters['cumulative'] = kwargs["cumulative"]
            parameters['download type'] = kwargs["downloads"]
            parameters['bulk import'] = kwargs["bulk_import_included"]
            graphs = [{'data': respdict['dataset_counts_by_month'], 'name': 'Dataset counts'},
                      {'data': respdict['file_counts_by_month'], 'name': 'File counts'},
                      {'data': respdict['file_downloads_by_month'], 'name': 'Download counts'}]
            return get_easy_excel_sheets(parameters, graphs, 'Easy metrics.xlsx')

        else:
            if kwargs and len(kwargs) > 0:
                # kwargs override GET parameters
                stats_datasets = StatsMakerDatasets(**kwargs)
                stats_files = StatsMakerFiles(**kwargs)
            else:
                stats_datasets = StatsMakerDatasets(**request.GET.dict())
                stats_files = StatsMakerFiles(**request.GET.dict())

            resp_dict = respdict

            # -------------------------
            # Datasets created each month
            # -------------------------
            stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published()
            if not stats_monthly_ds_counts.has_error():
                resp_dict['dataset_counts_by_month'] = list(stats_monthly_ds_counts.result_data['records'])
                if noncumulative and resp_dict['dataset_counts_by_month']:
                    resp_dict['max_count_datasets'] = str(max(item['count'] for item in resp_dict['dataset_counts_by_month']))
                else:
                    resp_dict['max_count_datasets'] = None

            # --------------------
            # Datasets by category
            # --------------------
            stats_ds_count_by_category = stats_datasets.get_dataset_category_counts_published()
            if not stats_ds_count_by_category.has_error():
                resp_dict['category'] = stats_datasets.get_category().capitalize()
                resp_dict['dataset_counts_by_category'] = stats_ds_count_by_category.result_data['records']

            # -------------------------
            # Files created, by month
            # -------------------------
            stats_monthly_file_counts = stats_files.get_file_count_by_month_published()
            if not stats_monthly_file_counts.has_error():
                resp_dict['file_counts_by_month'] = list(stats_monthly_file_counts.result_data['records'])
                if noncumulative and resp_dict['file_counts_by_month']:
                    resp_dict['max_count_files'] = str(max(item['count'] for item in resp_dict['file_counts_by_month']))
                else:
                    resp_dict['max_count_files'] = None

            # ------------------------------------------------------
            # Datasets (or just one file of it) downloaded, by month
            # ------------------------------------------------------
            stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(include_pre_dv4_downloads=True)
            if not stats_monthly_downloads.has_error():
                resp_dict['file_downloads_by_month'] = list(stats_monthly_downloads.result_data['records'])
                if noncumulative and resp_dict['file_downloads_by_month']:
                    resp_dict['max_count_downloads'] = str(max(item['count'] for item in resp_dict['file_downloads_by_month']))
                else:
                    resp_dict['max_count_downloads'] = None

            resp_dict['form'] = form
            return render(request, 'metrics/metrics_easy.html', resp_dict)


    else:       # DATAVERSE

        if kwargs and len(kwargs) > 0:
            # kwargs override GET parameters
            stats_datasets = StatsMakerDatasets(**kwargs)
            stats_dvs = StatsMakerDataverses(**kwargs)
            stats_files = StatsMakerFiles(**kwargs)
        else:
            stats_datasets = StatsMakerDatasets(**request.GET.dict())
            stats_dvs = StatsMakerDataverses(**request.GET.dict())
            stats_files = StatsMakerFiles(**request.GET.dict())

        # Start an OrderedDict
        resp_dict = OrderedDict()

        # -------------------------
        # Dataverses created each month
        # -------------------------
        stats_result_dv_counts = stats_dvs.get_dataverse_counts_by_month_published()
        #import ipdb; ipdb.set_trace()
        if not stats_result_dv_counts.has_error():
            resp_dict['dataverse_counts_by_month'] = list(stats_result_dv_counts.result_data['records'])
            resp_dict['dataverse_counts_by_month_sql'] = stats_result_dv_counts.sql_query

        # -------------------------
        # Dataverse counts by type
        # -------------------------
        stats_result_dv_counts_by_type =\
            stats_dvs.get_dataverse_counts_by_type_published(exclude_uncategorized=True)
        if not stats_result_dv_counts_by_type.has_error():
            resp_dict['dataverse_counts_by_type'] = stats_result_dv_counts_by_type.result_data['records']
            resp_dict['dv_counts_by_category_sql'] = stats_result_dv_counts_by_type.sql_query


        # -------------------------
        # Datasets created each month
        # -------------------------
        stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published()
        if not stats_monthly_ds_counts.has_error():
            resp_dict['dataset_counts_by_month'] = list(stats_monthly_ds_counts.result_data['records'])
            resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query


        stats_ds_count_by_category = stats_datasets.get_dataset_category_counts_published()
        if not stats_ds_count_by_category.has_error():
            resp_dict['category'] = stats_datasets.get_category().capitalize()
            resp_dict['dataset_counts_by_category'] = stats_ds_count_by_category.result_data['records']
            #resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query

        # -------------------------
        # Files created, by month
        # -------------------------
        stats_monthly_file_counts = stats_files.get_file_count_by_month_published()
        if not stats_monthly_file_counts.has_error():
            resp_dict['file_counts_by_month'] = list(stats_monthly_file_counts.result_data['records'])
            resp_dict['file_counts_by_month_sql'] = stats_monthly_file_counts.sql_query

        # -------------------------
        # Files downloaded, by month
        # -------------------------
        stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(include_pre_dv4_downloads=True)
        if not stats_monthly_downloads.has_error():
            resp_dict['file_downloads_by_month'] = list(stats_monthly_downloads.result_data['records'])
            resp_dict['file_downloads_by_month_sql'] = stats_monthly_downloads.sql_query

        # -------------------------
        # File counts by content type
        # -------------------------
        # rp: removing this from current charts
        """
        stats_file_content_types = stats_files.get_datafile_content_type_counts_published()
        if not stats_file_content_types.has_error():
            resp_dict['file_content_types'] = list(stats_file_content_types.result_data)
            resp_dict['file_content_types_sql'] = stats_file_content_types.sql_query
            resp_dict['file_content_types_top_20'] = list(stats_file_content_types.result_data)[:20]
            #resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4)
        """

        return render(request, 'metrics/metrics_public.html', resp_dict)
Beispiel #9
0
    sys.path.append(proj_path)
    #sys.path.append(dirname(proj_path))
    django.setup()
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "miniverse.settings.local")
    EASY_STATISTICS = 'true'

import random
import numpy as np
import pandas as pd
from django.db.models import F

from dv_apps.datasets.models import *
from dv_apps.datafiles.models import FileMetadata
from dv_apps.metrics.stats_util_base import StatsMakerBase
from dv_apps.metrics.stats_view_base import StatsViewSwagger
from dv_apps.metrics.stats_util_datasets import StatsMakerDatasets
from dv_apps.metrics.stats_util_files import StatsMakerFiles
# get_easy_dataset_category_counts

miniset = StatsMakerBase()
datasets = StatsMakerDatasets()
files = StatsMakerFiles()
print "Dataset count %s: \n" % datasets.get_easy_dataset_category_counts()
print "Datasets count by month: \n %s" % datasets.get_easy_deposit_count_by_month(
).get_csv_content()
print "Files by months: \n %s " % files.get_easy_file_downloads_by_month(
).get_csv_content()
print "EASY categories: \n %s " % datasets.get_easy_dataset_category_counts(
)  #.get_csv_content()
Beispiel #10
0
def view_public_visualizations(request, **kwargs):
    """
    Return HTML/D3Plus visualizations for a variety of public statistics
    """

    if kwargs and len(kwargs) > 0:
        # kwargs override GET parameters
        stats_datasets = StatsMakerDatasets(**kwargs)
        stats_dvs = StatsMakerDataverses(**kwargs)
        stats_files = StatsMakerFiles(**kwargs)
    else:
        stats_datasets = StatsMakerDatasets(**request.GET.dict())
        stats_dvs = StatsMakerDataverses(**request.GET.dict())
        stats_files = StatsMakerFiles(**request.GET.dict())

    # Start an OrderedDict
    resp_dict = OrderedDict()

    # -------------------------
    # Dataverses created each month
    # -------------------------
    stats_result_dv_counts = stats_dvs.get_dataverse_counts_by_month_published(
    )
    #import ipdb; ipdb.set_trace()
    if not stats_result_dv_counts.has_error():
        resp_dict['dataverse_counts_by_month'] = list(
            stats_result_dv_counts.result_data['records'])
        resp_dict[
            'dataverse_counts_by_month_sql'] = stats_result_dv_counts.sql_query

    # -------------------------
    # Dataverse counts by type
    # -------------------------
    stats_result_dv_counts_by_type =\
        stats_dvs.get_dataverse_counts_by_type_published(exclude_uncategorized=True)
    if not stats_result_dv_counts_by_type.has_error():
        resp_dict[
            'dataverse_counts_by_type'] = stats_result_dv_counts_by_type.result_data[
                'records']
        resp_dict[
            'dv_counts_by_category_sql'] = stats_result_dv_counts_by_type.sql_query

    # -------------------------
    # Datasets created each month
    # -------------------------
    stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published(
    )
    if not stats_monthly_ds_counts.has_error():
        resp_dict['dataset_counts_by_month'] = list(
            stats_monthly_ds_counts.result_data['records'])
        resp_dict[
            'dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query

    stats_ds_count_by_subject = stats_datasets.get_dataset_subject_counts_published(
    )
    if not stats_monthly_ds_counts.has_error():
        resp_dict[
            'dataset_counts_by_subject'] = stats_ds_count_by_subject.result_data[
                'records']
        #resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query

    # -------------------------
    # Files created, by month
    # -------------------------
    stats_monthly_file_counts = stats_files.get_file_count_by_month_published()
    if not stats_monthly_file_counts.has_error():
        resp_dict['file_counts_by_month'] = list(
            stats_monthly_file_counts.result_data['records'])
        resp_dict[
            'file_counts_by_month_sql'] = stats_monthly_file_counts.sql_query

    # -------------------------
    # Files downloaded, by month
    # -------------------------
    stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(
        include_pre_dv4_downloads=True)
    if not stats_monthly_downloads.has_error():
        resp_dict['file_downloads_by_month'] = list(
            stats_monthly_downloads.result_data['records'])
        resp_dict[
            'file_downloads_by_month_sql'] = stats_monthly_downloads.sql_query

    # -------------------------
    # File counts by content type
    # -------------------------
    # rp: removing this from current charts
    """
    stats_file_content_types = stats_files.get_datafile_content_type_counts_published()
    if not stats_file_content_types.has_error():
        resp_dict['file_content_types'] = list(stats_file_content_types.result_data)
        resp_dict['file_content_types_sql'] = stats_file_content_types.sql_query
        resp_dict['file_content_types_top_20'] = list(stats_file_content_types.result_data)[:20]
        #resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4)
    """
    #success, datafile_content_type_counts =\ #stats_files.get_datafile_content_type_counts_published()
    #if success:
    #    resp_dict['datafile_content_type_counts'] = datafile_content_type_counts[:15]

    return render(request, 'metrics/metrics_public.html', resp_dict)