Esempio n. 1
0
    def test_15_file_content_types_all(self):
        """15 - Content types of all files"""
        print(self.test_15_file_content_types_all.__doc__)

        kwargs = dict(start_date='2015-11-01', end_date='2016-03-01')
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_datafile_content_type_counts()

        # check number of entries
        self.assertEqual(len(r.result_data['records']), 25)

        # check first listing
        first_listing = {
            'contenttype': u'application/octet-stream',
            'percent_string': '43.6%',
            'short_content_type': u'octet-stream',
            'total_count': 381,
            'type_count': 166
        }
        self.assertEqual(dict(r.result_data['records'][0]), first_listing)

        # check 3rd listing
        third_listing = {
            'contenttype': u'text/tab-separated-values',
            'percent_string': '12.9%',
            'short_content_type': u'tab-separated-values',
            'total_count': 381,
            'type_count': 49
        }

        self.assertEqual(dict(r.result_data['records'][2]), third_listing)
Esempio n. 2
0
    def test_13_file_content_types_published(self):
        """13 - Content types of published files"""
        print(self.test_13_file_content_types_published.__doc__)

        kwargs = dict(start_date='2015-11-01', end_date='2016-03-01')
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_datafile_content_type_counts_published()

        # check number of entries
        self.assertEqual(len(r.result_data['records']), 18)

        # check first listing
        first_listing = {
            'contenttype': u'application/octet-stream',
            'percent_string': '65.1%',
            'short_content_type': u'octet-stream',
            'total_count': 255,
            'type_count': 166
        }
        self.assertEqual(dict(r.result_data['records'][0]), first_listing)

        # check 3rd listing
        third_listing = {
            'contenttype': u'text/tab-separated-values',
            'percent_string': '9.0%',
            'short_content_type': u'tab-separated-values',
            'total_count': 255,
            'type_count': 23
        }
        self.assertEqual(r.result_data['records'][2], third_listing)
Esempio n. 3
0
    def test_14_file_content_types_unpublished(self):
        """14 - Content types of published files"""
        print(self.test_14_file_content_types_unpublished.__doc__)

        kwargs = dict(start_date='2015-11-01', end_date='2016-03-01')
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_datafile_content_type_counts_unpublished()

        # check number of entries
        self.assertEqual(len(r.result_data['records']), 19)

        # check first listing
        first_listing = {
            'contenttype': u'image/jpeg',
            'percent_string': '45.2%',
            'short_content_type': u'jpeg',
            'total_count': 126,
            'type_count': 57
        }
        self.assertEqual(dict(r.result_data['records'][0]), first_listing)

        # check 3rd listing
        third_listing = {
            'contenttype': u'text/plain',
            'percent_string': '10.3%',
            'short_content_type': u'plain',
            'total_count': 126,
            'type_count': 13
        }
        self.assertEqual(r.result_data['records'][2], third_listing)
Esempio n. 4
0
    def test_14_file_content_types_unpublished(self):
        """14 - Content types of published files"""
        print (self.test_14_file_content_types_unpublished.__doc__)

        kwargs = dict(start_date='2015-11-01',
                    end_date='2016-03-01')
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_datafile_content_type_counts_unpublished()

        # check number of entries
        self.assertEqual(len(r.result_data['records']), 19)

        # check first listing
        first_listing = {'contenttype': u'image/jpeg',
             'percent_string': '45.2%',
             'short_content_type': u'jpeg',
             'total_count': 126,
             'type_count': 57}
        self.assertEqual(dict(r.result_data['records'][0]), first_listing)

        # check 3rd listing
        third_listing = {'contenttype': u'text/plain',
             'percent_string': '10.3%',
             'short_content_type': u'plain',
             'total_count': 126,
             'type_count': 13}
        self.assertEqual(r.result_data['records'][2], third_listing)
Esempio n. 5
0
    def test_15_file_content_types_all(self):
        """15 - Content types of all files"""
        print (self.test_15_file_content_types_all.__doc__)

        kwargs = dict(start_date='2015-11-01',
                    end_date='2016-03-01')
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_datafile_content_type_counts()

        # check number of entries
        self.assertEqual(len(r.result_data['records']), 25)

        # check first listing
        first_listing = {'contenttype': u'application/octet-stream',
             'percent_string': '43.6%',
             'short_content_type': u'octet-stream',
             'total_count': 381,
             'type_count': 166}
        self.assertEqual(dict(r.result_data['records'][0]), first_listing)

        # check 3rd listing
        third_listing = {'contenttype': u'text/tab-separated-values',
             'percent_string': '12.9%',
             'short_content_type': u'tab-separated-values',
             'total_count': 381,
             'type_count': 49}

        self.assertEqual(dict(r.result_data['records'][2]), third_listing)
Esempio n. 6
0
    def test_13_file_content_types_published(self):
        """13 - Content types of published files"""
        print (self.test_13_file_content_types_published.__doc__)

        kwargs = dict(start_date='2015-11-01',
                    end_date='2016-03-01')
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_datafile_content_type_counts_published()

        # check number of entries
        self.assertEqual(len(r.result_data['records']), 18)

        # check first listing
        first_listing = {'contenttype': u'application/octet-stream',
             'percent_string': '65.1%',
             'short_content_type': u'octet-stream',
             'total_count': 255,
             'type_count': 166}
        self.assertEqual(dict(r.result_data['records'][0]), first_listing)

        # check 3rd listing
        third_listing = {'contenttype': u'text/tab-separated-values',
         'percent_string': '9.0%',
         'short_content_type': u'tab-separated-values',
         'total_count': 255,
         'type_count': 23}
        self.assertEqual(r.result_data['records'][2], third_listing)
Esempio n. 7
0
    def test_19_file_extensions_within_type(self):
        """19 - File extensions within type"""
        print(self.test_19_file_extensions_within_type.__doc__)

        stats_maker = StatsMakerFiles()
        r = stats_maker.view_file_extensions_within_type(
            file_type=FILE_TYPE_OCTET_STREAM)

        num_unique_extensions = r.result_data.get('number_unique_extensions')

        # check number of extensions
        #
        self.assertEqual(num_unique_extensions, 67)

        # check that list length matches number of extensions
        #
        ext_counts = r.result_data.get('records', [])
        self.assertEqual(len(ext_counts), 67)

        print('ext_counts', ext_counts[4])
        # check 5th listing in extension count list
        #
        listing_5 = OrderedDict([('extension', u'.docx'), ('count', 15),
                                 ('total_count', 437),
                                 ('percent_string', '3.432%')])

        self.assertEqual(listing_5, ext_counts[4])
Esempio n. 8
0
    def get_basic_stats():

        stats_files = StatsMakerFiles()
        stats_result = stats_files.view_file_extensions_within_type(FILE_TYPE_OCTET_STREAM)
        if not (stats_result and stats_result.result_data):
            raise ValueError('ContentTypeStats not calculated for content types')

        print 'type(stats_result.result_data)', type(stats_result.result_data)
        print 'type(stats_result)', type(stats_result)
        print 'keys', stats_result.result_data.keys()

        records = stats_result.result_data.get('records')
        #print 'stats_result', stats_result
        total_file_count = stats_result.result_data.get('total_file_count')
        number_unique_extensions = stats_result.result_data.get('number_unique_extensions')
        all_dv_files_count = stats_result.result_data.get('all_dv_files')
        percent_unknown = stats_result.result_data.get('percent_unknown')


        file_stats = dict(\
            cnt_no_content_type=NamedStat(\
                'No Content Type',
                total_file_count,
                ('Counts of FileMetadata objects with an unknown content type'),
                'view_files_extensions_with_unknown_content_types'),
            percent_no_content_type=NamedStat(\
                '% No Content Type',
                percent_unknown,
                ('Percent of FileMetadata objects with an unknown content type'),
                'view_files_extensions_with_unknown_content_types'),
                )

        return file_stats
Esempio n. 9
0
    def test_11_file_downloads_by_month_unpublished(self):
        """11 - File downloads by month: unpublished,"""
        print(self.test_11_file_downloads_by_month_unpublished.__doc__)

        kwargs = dict(start_date='2015-02-01',\
                    end_date='2015-11-01')

        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_file_downloads_by_month_unpublished()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 0)

        # check data -- very rare to have downloaded "unpublished" files
        self.assertEqual(r.result_data['records'], [])
Esempio n. 10
0
    def test_11_file_downloads_by_month_unpublished(self):
        """11 - File downloads by month: unpublished,"""
        print (self.test_11_file_downloads_by_month_unpublished.__doc__)

        kwargs = dict(start_date='2015-02-01',\
                    end_date='2015-11-01')

        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_file_downloads_by_month_unpublished()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 0)

        # check data -- very rare to have downloaded "unpublished" files
        self.assertEqual(r.result_data['records'], [])
Esempio n. 11
0
def get_total_published_counts():
    """Get total counts for published dataverses, datasets, and files"""

    stats_dvs = StatsMakerDataverses()
    stats_ds = StatsMakerDatasets()
    stats_files = StatsMakerFiles()

    params = dict(include_pre_dv4_downloads=True)

    d = dict(total_dataverses=stats_dvs.get_dataverse_count_published().result_data['count'],\
            total_datasets=stats_ds.get_dataset_count_published().result_data['count'],
            total_files=stats_files.get_datafile_count_published().result_data['count'],\
            total_downloads=stats_files.get_total_file_downloads(**params).result_data['count'],\
            )

    return d
Esempio n. 12
0
def view_files_by_type(request):

    stats_files = StatsMakerFiles(**request.GET.dict())

    # Start an OrderedDict
    resp_dict = OrderedDict()

    # -------------------------
    # Dataverses created each month
    # -------------------------
    success, file_content_types = stats_files.get_datafile_content_type_counts_published()
    if success:
        resp_dict['file_content_types'] = list(file_content_types)
        resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4)

    return render(request, 'metrics/visualizations/file_content_types.html', resp_dict)
Esempio n. 13
0
    def test_09_file_total_counts(self):
        """09 - Count total files: published, unpublished, all"""
        print (self.test_09_file_total_counts.__doc__)

        stats_maker = StatsMakerFiles()

        # Count published file
        r = stats_maker.get_datafile_count_published()
        self.assertEqual(r.result_data['count'], 1014)

        # Count unpublished file
        r = stats_maker.get_datafile_count_unpublished()
        self.assertEqual(r.result_data['count'], 570)

        # Count all files
        r = stats_maker.get_datafile_count()
        self.assertEqual(r.result_data['count'], 1584)
Esempio n. 14
0
def view_all_file_extension_counts(request):
    """Reference table of all file extensions with counts"""

    stats_files = StatsMakerFiles()
    all_counts = stats_files.view_file_extensions_within_type()
    if all_counts and all_counts.result_data:
        d = dict(all_counts=all_counts.result_data['records'],
                total_file_count=all_counts.result_data['total_file_count'],
                number_unique_extensions=all_counts.result_data['number_unique_extensions'],
                )
    else:
        d = dict(all_counts=[],
                total_file_count=0,
                number_unique_extensions=0,
                )

    return render(request, 'metrics/view_all_file_extension_counts.html', d)
Esempio n. 15
0
    def test_09_file_total_counts(self):
        """09 - Count total files: published, unpublished, all"""
        print(self.test_09_file_total_counts.__doc__)

        stats_maker = StatsMakerFiles()

        # Count published file
        r = stats_maker.get_datafile_count_published()
        self.assertEqual(r.result_data['count'], 1014)

        # Count unpublished file
        r = stats_maker.get_datafile_count_unpublished()
        self.assertEqual(r.result_data['count'], 570)

        # Count all files
        r = stats_maker.get_datafile_count()
        self.assertEqual(r.result_data['count'], 1584)
Esempio n. 16
0
def view_files_extensions_with_unknown_content_types(request):
    """Reference table of file extensions with unknown content type"""

    stats_files = StatsMakerFiles()
    unknown_counts = stats_files.view_file_extensions_within_type(FILE_TYPE_OCTET_STREAM)
    if unknown_counts and unknown_counts.result_data:
        d = dict(unknown_counts=unknown_counts.result_data['records'],
                total_file_count=unknown_counts.result_data['total_file_count'],
                number_unique_extensions=unknown_counts.result_data['number_unique_extensions'],
                all_dv_files_count=unknown_counts.result_data['all_dv_files'],
                percent_unknown=unknown_counts.result_data['percent_unknown'])
    else:
        d = dict(unknown_counts=[],
                total_file_count=0,
                number_unique_extensions=0,
                all_dv_files_count=0,
                percent_unknown=0)

    return render(request, 'metrics/view_file_extensions_with_unknown_content_types.html', d)
Esempio n. 17
0
    def test_12_file_downloads_by_month_all(self):
        """12 - File downloads by month: all"""
        print (self.test_12_file_downloads_by_month_all.__doc__)

        kwargs = dict(selected_year=2015)
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_file_downloads_by_month()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 9)

        # check last month
        last_month = {'count': 31,
             'month_name': 'December',
             'month_name_short': 'Dec',
             'month_num': 12,
             'running_total': 465,
             'year_num': 2015,
             'yyyy_mm': '2015-12'}
        self.assertEqual(dict(r.result_data['records'][-1]), last_month)
Esempio n. 18
0
def view_all_file_extension_counts(request):
    """Reference table of all file extensions with counts"""

    stats_files = StatsMakerFiles()
    all_counts = stats_files.view_file_extensions_within_type()
    if all_counts and all_counts.result_data:
        d = dict(
            all_counts=all_counts.result_data['records'],
            total_file_count=all_counts.result_data['total_file_count'],
            number_unique_extensions=all_counts.
            result_data['number_unique_extensions'],
        )
    else:
        d = dict(
            all_counts=[],
            total_file_count=0,
            number_unique_extensions=0,
        )

    return render(request, 'metrics/view_all_file_extension_counts.html', d)
Esempio n. 19
0
    def test_12_file_downloads_by_month_all(self):
        """12 - File downloads by month: all"""
        print(self.test_12_file_downloads_by_month_all.__doc__)

        kwargs = dict(selected_year=2015)
        stats_maker = StatsMakerFiles(**kwargs)
        r = stats_maker.get_file_downloads_by_month()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 9)

        # check last month
        last_month = {
            'count': 31,
            'month_name': 'December',
            'month_name_short': 'Dec',
            'month_num': 12,
            'running_total': 465,
            'year_num': 2015,
            'yyyy_mm': '2015-12'
        }
        self.assertEqual(dict(r.result_data['records'][-1]), last_month)
Esempio n. 20
0
    def test_10_file_downloads_by_month_published(self):
        """10 - File downloads by month: published,"""
        print (self.test_10_file_downloads_by_month_published.__doc__)

        kwargs = dict(start_date='2015-05-30',\
                    end_date='2015-10-01')
        stats_maker = StatsMakerFiles(**kwargs)

        r = stats_maker.get_file_downloads_by_month_published()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 5)

        # check last month
        last_month = {'count': 7,
             'month_name': 'September',
             'month_name_short': 'Sep',
             'month_num': 9,
             'running_total': 309,
             'year_num': 2015,
             'yyyy_mm': '2015-09'}
        self.assertEqual(dict(r.result_data['records'][-1]), last_month)
Esempio n. 21
0
def view_files_extensions_with_unknown_content_types(request):
    """Reference table of file extensions with unknown content type"""

    stats_files = StatsMakerFiles()
    unknown_counts = stats_files.view_file_extensions_within_type(
        FILE_TYPE_OCTET_STREAM)
    if unknown_counts and unknown_counts.result_data:
        d = dict(
            unknown_counts=unknown_counts.result_data['records'],
            total_file_count=unknown_counts.result_data['total_file_count'],
            number_unique_extensions=unknown_counts.
            result_data['number_unique_extensions'],
            all_dv_files_count=unknown_counts.result_data['all_dv_files'],
            percent_unknown=unknown_counts.result_data['percent_unknown'])
    else:
        d = dict(unknown_counts=[],
                 total_file_count=0,
                 number_unique_extensions=0,
                 all_dv_files_count=0,
                 percent_unknown=0)

    return render(
        request,
        'metrics/view_file_extensions_with_unknown_content_types.html', d)
Esempio n. 22
0
    def test_19_file_extensions_within_type(self):
        """19 - File extensions within type"""
        print (self.test_19_file_extensions_within_type.__doc__)

        stats_maker = StatsMakerFiles()
        r = stats_maker.view_file_extensions_within_type(file_type=FILE_TYPE_OCTET_STREAM)

        num_unique_extensions = r.result_data.get('number_unique_extensions')

        # check number of extensions
        #
        self.assertEqual(num_unique_extensions, 67)

        # check that list length matches number of extensions
        #
        ext_counts = r.result_data.get('records', [])
        self.assertEqual(len(ext_counts), 67)

        print ('ext_counts', ext_counts[4])
        # check 5th listing in extension count list
        #
        listing_5 = OrderedDict([('extension', u'.docx'), ('count', 15), ('total_count', 437), ('percent_string', '3.432%')])

        self.assertEqual(listing_5, ext_counts[4])
Esempio n. 23
0
    def get_basic_stats():

        stats_files = StatsMakerFiles()
        stats_result = stats_files.view_file_extensions_within_type(
            FILE_TYPE_OCTET_STREAM)
        if not (stats_result and stats_result.result_data):
            raise ValueError(
                'ContentTypeStats not calculated for content types')

        print 'type(stats_result.result_data)', type(stats_result.result_data)
        print 'type(stats_result)', type(stats_result)
        print 'keys', stats_result.result_data.keys()

        records = stats_result.result_data.get('records')
        #print 'stats_result', stats_result
        total_file_count = stats_result.result_data.get('total_file_count')
        number_unique_extensions = stats_result.result_data.get(
            'number_unique_extensions')
        all_dv_files_count = stats_result.result_data.get('all_dv_files')
        percent_unknown = stats_result.result_data.get('percent_unknown')


        file_stats = dict(\
            cnt_no_content_type=NamedStat(\
                'No Content Type',
                total_file_count,
                ('Counts of FileMetadata objects with an unknown content type'),
                'view_files_extensions_with_unknown_content_types'),
            percent_no_content_type=NamedStat(\
                '% No Content Type',
                percent_unknown,
                ('Percent of FileMetadata objects with an unknown content type'),
                'view_files_extensions_with_unknown_content_types'),
                )

        return file_stats
Esempio n. 24
0
    def test_10_file_downloads_by_month_published(self):
        """10 - File downloads by month: published,"""
        print(self.test_10_file_downloads_by_month_published.__doc__)

        kwargs = dict(start_date='2015-05-30',\
                    end_date='2015-10-01')
        stats_maker = StatsMakerFiles(**kwargs)

        r = stats_maker.get_file_downloads_by_month_published()

        # check number of months
        self.assertEqual(len(r.result_data['records']), 5)

        # check last month
        last_month = {
            'count': 7,
            'month_name': 'September',
            'month_name_short': 'Sep',
            'month_num': 9,
            'running_total': 309,
            'year_num': 2015,
            'yyyy_mm': '2015-09'
        }
        self.assertEqual(dict(r.result_data['records'][-1]), last_month)
Esempio n. 25
0
def view_public_visualizations(request, **kwargs):
    """
    Return HTML/D3Plus visualizations for a variety of public statistics
    """

    if kwargs and len(kwargs) > 0:
        # kwargs override GET parameters
        stats_datasets = StatsMakerDatasets(**kwargs)
        stats_dvs = StatsMakerDataverses(**kwargs)
        stats_files = StatsMakerFiles(**kwargs)
    else:
        stats_datasets = StatsMakerDatasets(**request.GET.dict())
        stats_dvs = StatsMakerDataverses(**request.GET.dict())
        stats_files = StatsMakerFiles(**request.GET.dict())

    # Start an OrderedDict
    resp_dict = OrderedDict()

    # -------------------------
    # Dataverses created each month
    # -------------------------
    stats_result_dv_counts = stats_dvs.get_dataverse_counts_by_month_published(
    )
    #import ipdb; ipdb.set_trace()
    if not stats_result_dv_counts.has_error():
        resp_dict['dataverse_counts_by_month'] = list(
            stats_result_dv_counts.result_data['records'])
        resp_dict[
            'dataverse_counts_by_month_sql'] = stats_result_dv_counts.sql_query

    # -------------------------
    # Dataverse counts by type
    # -------------------------
    stats_result_dv_counts_by_type =\
        stats_dvs.get_dataverse_counts_by_type_published(exclude_uncategorized=True)
    if not stats_result_dv_counts_by_type.has_error():
        resp_dict[
            'dataverse_counts_by_type'] = stats_result_dv_counts_by_type.result_data[
                'records']
        resp_dict[
            'dv_counts_by_category_sql'] = stats_result_dv_counts_by_type.sql_query

    # -------------------------
    # Datasets created each month
    # -------------------------
    stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published(
    )
    if not stats_monthly_ds_counts.has_error():
        resp_dict['dataset_counts_by_month'] = list(
            stats_monthly_ds_counts.result_data['records'])
        resp_dict[
            'dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query

    stats_ds_count_by_subject = stats_datasets.get_dataset_subject_counts_published(
    )
    if not stats_monthly_ds_counts.has_error():
        resp_dict[
            'dataset_counts_by_subject'] = stats_ds_count_by_subject.result_data[
                'records']
        #resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query

    # -------------------------
    # Files created, by month
    # -------------------------
    stats_monthly_file_counts = stats_files.get_file_count_by_month_published()
    if not stats_monthly_file_counts.has_error():
        resp_dict['file_counts_by_month'] = list(
            stats_monthly_file_counts.result_data['records'])
        resp_dict[
            'file_counts_by_month_sql'] = stats_monthly_file_counts.sql_query

    # -------------------------
    # Files downloaded, by month
    # -------------------------
    stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(
        include_pre_dv4_downloads=True)
    if not stats_monthly_downloads.has_error():
        resp_dict['file_downloads_by_month'] = list(
            stats_monthly_downloads.result_data['records'])
        resp_dict[
            'file_downloads_by_month_sql'] = stats_monthly_downloads.sql_query

    # -------------------------
    # File counts by content type
    # -------------------------
    # rp: removing this from current charts
    """
    stats_file_content_types = stats_files.get_datafile_content_type_counts_published()
    if not stats_file_content_types.has_error():
        resp_dict['file_content_types'] = list(stats_file_content_types.result_data)
        resp_dict['file_content_types_sql'] = stats_file_content_types.sql_query
        resp_dict['file_content_types_top_20'] = list(stats_file_content_types.result_data)[:20]
        #resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4)
    """
    #success, datafile_content_type_counts =\ #stats_files.get_datafile_content_type_counts_published()
    #if success:
    #    resp_dict['datafile_content_type_counts'] = datafile_content_type_counts[:15]

    return render(request, 'metrics/metrics_public.html', resp_dict)
Esempio n. 26
0
    sys.path.append(proj_path)
    #sys.path.append(dirname(proj_path))
    django.setup()
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "miniverse.settings.local")
    EASY_STATISTICS = 'true'

import random
import numpy as np
import pandas as pd
from django.db.models import F

from dv_apps.datasets.models import *
from dv_apps.datafiles.models import FileMetadata
from dv_apps.metrics.stats_util_base import StatsMakerBase
from dv_apps.metrics.stats_view_base import StatsViewSwagger
from dv_apps.metrics.stats_util_datasets import StatsMakerDatasets
from dv_apps.metrics.stats_util_files import StatsMakerFiles
# get_easy_dataset_category_counts

miniset = StatsMakerBase()
datasets = StatsMakerDatasets()
files = StatsMakerFiles()
print "Dataset count %s: \n" % datasets.get_easy_dataset_category_counts()
print "Datasets count by month: \n %s" % datasets.get_easy_deposit_count_by_month(
).get_csv_content()
print "Files by months: \n %s " % files.get_easy_file_downloads_by_month(
).get_csv_content()
print "EASY categories: \n %s " % datasets.get_easy_dataset_category_counts(
)  #.get_csv_content()
Esempio n. 27
0
def view_public_visualizations(request, **kwargs):
    """
    Return HTML/D3Plus visualizations for a variety of public statistics
    """
    if EASY_STATISTICS:

        if request.method == "POST":
            form = Metrics(request.POST)
        else:
            form = Metrics()
        kwargs["category"] = form.data.get("category", "audience")
        kwargs["start_date"] = form.data.get("start_date", "2008-01-01")
        kwargs["end_date"] = form.data.get("end_date", "2017-12-31")
        kwargs["cumulative"] = form.data.get("cumulative", "cumulative_period")
        kwargs["downloads"] = form.data.get("downloads", "files")
        kwargs["date_type"] = form.data.get("date_type", "publish")
        kwargs["bulk_import_included"] = form.data.get("bulk_import_included", "bulk_included")
        noncumulative = kwargs.get('cumulative', None) == 'noncumulative'

        if form.data.get('excel'):
            parameters = OrderedDict()
            parameters['start date'] = kwargs["start_date"]
            parameters['end date'] = kwargs["end_date"]
            parameters['date type'] = kwargs["date_type"]
            parameters['cumulative'] = kwargs["cumulative"]
            parameters['download type'] = kwargs["downloads"]
            parameters['bulk import'] = kwargs["bulk_import_included"]
            graphs = [{'data': respdict['dataset_counts_by_month'], 'name': 'Dataset counts'},
                      {'data': respdict['file_counts_by_month'], 'name': 'File counts'},
                      {'data': respdict['file_downloads_by_month'], 'name': 'Download counts'}]
            return get_easy_excel_sheets(parameters, graphs, 'Easy metrics.xlsx')

        else:
            if kwargs and len(kwargs) > 0:
                # kwargs override GET parameters
                stats_datasets = StatsMakerDatasets(**kwargs)
                stats_files = StatsMakerFiles(**kwargs)
            else:
                stats_datasets = StatsMakerDatasets(**request.GET.dict())
                stats_files = StatsMakerFiles(**request.GET.dict())

            resp_dict = respdict

            # -------------------------
            # Datasets created each month
            # -------------------------
            stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published()
            if not stats_monthly_ds_counts.has_error():
                resp_dict['dataset_counts_by_month'] = list(stats_monthly_ds_counts.result_data['records'])
                if noncumulative and resp_dict['dataset_counts_by_month']:
                    resp_dict['max_count_datasets'] = str(max(item['count'] for item in resp_dict['dataset_counts_by_month']))
                else:
                    resp_dict['max_count_datasets'] = None

            # --------------------
            # Datasets by category
            # --------------------
            stats_ds_count_by_category = stats_datasets.get_dataset_category_counts_published()
            if not stats_ds_count_by_category.has_error():
                resp_dict['category'] = stats_datasets.get_category().capitalize()
                resp_dict['dataset_counts_by_category'] = stats_ds_count_by_category.result_data['records']

            # -------------------------
            # Files created, by month
            # -------------------------
            stats_monthly_file_counts = stats_files.get_file_count_by_month_published()
            if not stats_monthly_file_counts.has_error():
                resp_dict['file_counts_by_month'] = list(stats_monthly_file_counts.result_data['records'])
                if noncumulative and resp_dict['file_counts_by_month']:
                    resp_dict['max_count_files'] = str(max(item['count'] for item in resp_dict['file_counts_by_month']))
                else:
                    resp_dict['max_count_files'] = None

            # ------------------------------------------------------
            # Datasets (or just one file of it) downloaded, by month
            # ------------------------------------------------------
            stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(include_pre_dv4_downloads=True)
            if not stats_monthly_downloads.has_error():
                resp_dict['file_downloads_by_month'] = list(stats_monthly_downloads.result_data['records'])
                if noncumulative and resp_dict['file_downloads_by_month']:
                    resp_dict['max_count_downloads'] = str(max(item['count'] for item in resp_dict['file_downloads_by_month']))
                else:
                    resp_dict['max_count_downloads'] = None

            resp_dict['form'] = form
            return render(request, 'metrics/metrics_easy.html', resp_dict)


    else:       # DATAVERSE

        if kwargs and len(kwargs) > 0:
            # kwargs override GET parameters
            stats_datasets = StatsMakerDatasets(**kwargs)
            stats_dvs = StatsMakerDataverses(**kwargs)
            stats_files = StatsMakerFiles(**kwargs)
        else:
            stats_datasets = StatsMakerDatasets(**request.GET.dict())
            stats_dvs = StatsMakerDataverses(**request.GET.dict())
            stats_files = StatsMakerFiles(**request.GET.dict())

        # Start an OrderedDict
        resp_dict = OrderedDict()

        # -------------------------
        # Dataverses created each month
        # -------------------------
        stats_result_dv_counts = stats_dvs.get_dataverse_counts_by_month_published()
        #import ipdb; ipdb.set_trace()
        if not stats_result_dv_counts.has_error():
            resp_dict['dataverse_counts_by_month'] = list(stats_result_dv_counts.result_data['records'])
            resp_dict['dataverse_counts_by_month_sql'] = stats_result_dv_counts.sql_query

        # -------------------------
        # Dataverse counts by type
        # -------------------------
        stats_result_dv_counts_by_type =\
            stats_dvs.get_dataverse_counts_by_type_published(exclude_uncategorized=True)
        if not stats_result_dv_counts_by_type.has_error():
            resp_dict['dataverse_counts_by_type'] = stats_result_dv_counts_by_type.result_data['records']
            resp_dict['dv_counts_by_category_sql'] = stats_result_dv_counts_by_type.sql_query


        # -------------------------
        # Datasets created each month
        # -------------------------
        stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published()
        if not stats_monthly_ds_counts.has_error():
            resp_dict['dataset_counts_by_month'] = list(stats_monthly_ds_counts.result_data['records'])
            resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query


        stats_ds_count_by_category = stats_datasets.get_dataset_category_counts_published()
        if not stats_ds_count_by_category.has_error():
            resp_dict['category'] = stats_datasets.get_category().capitalize()
            resp_dict['dataset_counts_by_category'] = stats_ds_count_by_category.result_data['records']
            #resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query

        # -------------------------
        # Files created, by month
        # -------------------------
        stats_monthly_file_counts = stats_files.get_file_count_by_month_published()
        if not stats_monthly_file_counts.has_error():
            resp_dict['file_counts_by_month'] = list(stats_monthly_file_counts.result_data['records'])
            resp_dict['file_counts_by_month_sql'] = stats_monthly_file_counts.sql_query

        # -------------------------
        # Files downloaded, by month
        # -------------------------
        stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(include_pre_dv4_downloads=True)
        if not stats_monthly_downloads.has_error():
            resp_dict['file_downloads_by_month'] = list(stats_monthly_downloads.result_data['records'])
            resp_dict['file_downloads_by_month_sql'] = stats_monthly_downloads.sql_query

        # -------------------------
        # File counts by content type
        # -------------------------
        # rp: removing this from current charts
        """
        stats_file_content_types = stats_files.get_datafile_content_type_counts_published()
        if not stats_file_content_types.has_error():
            resp_dict['file_content_types'] = list(stats_file_content_types.result_data)
            resp_dict['file_content_types_sql'] = stats_file_content_types.sql_query
            resp_dict['file_content_types_top_20'] = list(stats_file_content_types.result_data)[:20]
            #resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4)
        """

        return render(request, 'metrics/metrics_public.html', resp_dict)