def test_08_dataset_counts_all(self): """08 - Test all dataset counts by month""" print (self.test_08_dataset_counts_all.__doc__) stats_maker = StatsMakerDatasets() r = stats_maker.get_dataset_counts_by_create_date() # check number of months self.assertEqual(len(r.result_data['records']), 16) # check 1st month first_month = {'count': 36, 'month_name': 'April', 'month_name_short': 'Apr', 'month_num': 4, 'running_total': 36, 'year_num': 2015, 'yyyy_mm': '2015-04'} self.assertEqual(dict(r.result_data['records'][0]), first_month) # check last month last_month = {'count': 98, 'month_name': 'July', 'month_name_short': 'Jul', 'month_num': 7, 'running_total': 570, 'year_num': 2016, 'yyyy_mm': '2016-07'} self.assertEqual(dict(r.result_data['records'][-1]), last_month)
def test_08_dataset_counts_all(self): """08 - Test all dataset counts by month""" print(self.test_08_dataset_counts_all.__doc__) stats_maker = StatsMakerDatasets() r = stats_maker.get_dataset_counts_by_create_date() # check number of months self.assertEqual(len(r.result_data['records']), 16) # check 1st month first_month = { 'count': 36, 'month_name': 'April', 'month_name_short': 'Apr', 'month_num': 4, 'running_total': 36, 'year_num': 2015, 'yyyy_mm': '2015-04' } self.assertEqual(dict(r.result_data['records'][0]), first_month) # check last month last_month = { 'count': 98, 'month_name': 'July', 'month_name_short': 'Jul', 'month_num': 7, 'running_total': 570, 'year_num': 2016, 'yyyy_mm': '2016-07' } self.assertEqual(dict(r.result_data['records'][-1]), last_month)
def get_stats_result(self, request): """Return the StatsResult object for this statistic""" stats_datasets = StatsMakerDatasets(**request.GET.dict()) pub_state = self.get_pub_state(request) if pub_state == self.PUB_STATE_ALL: stats_result = stats_datasets.get_dataset_count() elif pub_state == self.PUB_STATE_UNPUBLISHED: stats_result = stats_datasets.get_dataset_count_unpublished() else: stats_result = stats_datasets.get_dataset_count_published() return stats_result
def get_stats_result(self, request): """Return the StatsResult object for this statistic""" stats_datasets = StatsMakerDatasets(**request.GET.dict()) pub_state = self.get_pub_state(request) if pub_state == self.PUB_STATE_ALL: stats_result = stats_datasets.get_dataset_subject_counts() elif pub_state == self.PUB_STATE_UNPUBLISHED: stats_result = stats_datasets.get_dataset_subject_counts_unpublished() else: stats_result = stats_datasets.get_dataset_subject_counts_published() return stats_result
def get_total_published_counts(): """Get total counts for published dataverses, datasets, and files""" stats_dvs = StatsMakerDataverses() stats_ds = StatsMakerDatasets() stats_files = StatsMakerFiles() params = dict(include_pre_dv4_downloads=True) d = dict(total_dataverses=stats_dvs.get_dataverse_count_published().result_data['count'],\ total_datasets=stats_ds.get_dataset_count_published().result_data['count'], total_files=stats_files.get_datafile_count_published().result_data['count'],\ total_downloads=stats_files.get_total_file_downloads(**params).result_data['count'],\ ) return d
def test_05_dataset_total_counts(self): """05 - Count total datasets: published, unpublished, all""" print(self.test_05_dataset_total_counts.__doc__) kwargs = dict(start_date='2016-01-01') stats_maker = StatsMakerDatasets(**kwargs) # Count published dataset r = stats_maker.get_dataset_count_published() self.assertEqual(r.result_data['count'], 85) # Count unpublished dataset r = stats_maker.get_dataset_count_unpublished() self.assertEqual(r.result_data['count'], 198) # Count all datasets r = stats_maker.get_dataset_count() self.assertEqual(r.result_data['count'], 283)
def test_05_dataset_total_counts(self): """05 - Count total datasets: published, unpublished, all""" print (self.test_05_dataset_total_counts.__doc__) kwargs=dict(start_date='2016-01-01') stats_maker = StatsMakerDatasets(**kwargs) # Count published dataset r = stats_maker.get_dataset_count_published() self.assertEqual(r.result_data['count'], 85) # Count unpublished dataset r = stats_maker.get_dataset_count_unpublished() self.assertEqual(r.result_data['count'], 198) # Count all datasets r = stats_maker.get_dataset_count() self.assertEqual(r.result_data['count'], 283)
def view_public_visualizations(request, **kwargs): """ Return HTML/D3Plus visualizations for a variety of public statistics """ if EASY_STATISTICS: if request.method == "POST": form = Metrics(request.POST) else: form = Metrics() kwargs["category"] = form.data.get("category", "audience") kwargs["start_date"] = form.data.get("start_date", "2008-01-01") kwargs["end_date"] = form.data.get("end_date", "2017-12-31") kwargs["cumulative"] = form.data.get("cumulative", "cumulative_period") kwargs["downloads"] = form.data.get("downloads", "files") kwargs["date_type"] = form.data.get("date_type", "publish") kwargs["bulk_import_included"] = form.data.get("bulk_import_included", "bulk_included") noncumulative = kwargs.get('cumulative', None) == 'noncumulative' if form.data.get('excel'): parameters = OrderedDict() parameters['start date'] = kwargs["start_date"] parameters['end date'] = kwargs["end_date"] parameters['date type'] = kwargs["date_type"] parameters['cumulative'] = kwargs["cumulative"] parameters['download type'] = kwargs["downloads"] parameters['bulk import'] = kwargs["bulk_import_included"] graphs = [{'data': respdict['dataset_counts_by_month'], 'name': 'Dataset counts'}, {'data': respdict['file_counts_by_month'], 'name': 'File counts'}, {'data': respdict['file_downloads_by_month'], 'name': 'Download counts'}] return get_easy_excel_sheets(parameters, graphs, 'Easy metrics.xlsx') else: if kwargs and len(kwargs) > 0: # kwargs override GET parameters stats_datasets = StatsMakerDatasets(**kwargs) stats_files = StatsMakerFiles(**kwargs) else: stats_datasets = StatsMakerDatasets(**request.GET.dict()) stats_files = StatsMakerFiles(**request.GET.dict()) resp_dict = respdict # ------------------------- # Datasets created each month # ------------------------- stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published() if not stats_monthly_ds_counts.has_error(): resp_dict['dataset_counts_by_month'] = list(stats_monthly_ds_counts.result_data['records']) if noncumulative and resp_dict['dataset_counts_by_month']: resp_dict['max_count_datasets'] = str(max(item['count'] for item in resp_dict['dataset_counts_by_month'])) else: resp_dict['max_count_datasets'] = None # -------------------- # Datasets by category # -------------------- stats_ds_count_by_category = stats_datasets.get_dataset_category_counts_published() if not stats_ds_count_by_category.has_error(): resp_dict['category'] = stats_datasets.get_category().capitalize() resp_dict['dataset_counts_by_category'] = stats_ds_count_by_category.result_data['records'] # ------------------------- # Files created, by month # ------------------------- stats_monthly_file_counts = stats_files.get_file_count_by_month_published() if not stats_monthly_file_counts.has_error(): resp_dict['file_counts_by_month'] = list(stats_monthly_file_counts.result_data['records']) if noncumulative and resp_dict['file_counts_by_month']: resp_dict['max_count_files'] = str(max(item['count'] for item in resp_dict['file_counts_by_month'])) else: resp_dict['max_count_files'] = None # ------------------------------------------------------ # Datasets (or just one file of it) downloaded, by month # ------------------------------------------------------ stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(include_pre_dv4_downloads=True) if not stats_monthly_downloads.has_error(): resp_dict['file_downloads_by_month'] = list(stats_monthly_downloads.result_data['records']) if noncumulative and resp_dict['file_downloads_by_month']: resp_dict['max_count_downloads'] = str(max(item['count'] for item in resp_dict['file_downloads_by_month'])) else: resp_dict['max_count_downloads'] = None resp_dict['form'] = form return render(request, 'metrics/metrics_easy.html', resp_dict) else: # DATAVERSE if kwargs and len(kwargs) > 0: # kwargs override GET parameters stats_datasets = StatsMakerDatasets(**kwargs) stats_dvs = StatsMakerDataverses(**kwargs) stats_files = StatsMakerFiles(**kwargs) else: stats_datasets = StatsMakerDatasets(**request.GET.dict()) stats_dvs = StatsMakerDataverses(**request.GET.dict()) stats_files = StatsMakerFiles(**request.GET.dict()) # Start an OrderedDict resp_dict = OrderedDict() # ------------------------- # Dataverses created each month # ------------------------- stats_result_dv_counts = stats_dvs.get_dataverse_counts_by_month_published() #import ipdb; ipdb.set_trace() if not stats_result_dv_counts.has_error(): resp_dict['dataverse_counts_by_month'] = list(stats_result_dv_counts.result_data['records']) resp_dict['dataverse_counts_by_month_sql'] = stats_result_dv_counts.sql_query # ------------------------- # Dataverse counts by type # ------------------------- stats_result_dv_counts_by_type =\ stats_dvs.get_dataverse_counts_by_type_published(exclude_uncategorized=True) if not stats_result_dv_counts_by_type.has_error(): resp_dict['dataverse_counts_by_type'] = stats_result_dv_counts_by_type.result_data['records'] resp_dict['dv_counts_by_category_sql'] = stats_result_dv_counts_by_type.sql_query # ------------------------- # Datasets created each month # ------------------------- stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published() if not stats_monthly_ds_counts.has_error(): resp_dict['dataset_counts_by_month'] = list(stats_monthly_ds_counts.result_data['records']) resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query stats_ds_count_by_category = stats_datasets.get_dataset_category_counts_published() if not stats_ds_count_by_category.has_error(): resp_dict['category'] = stats_datasets.get_category().capitalize() resp_dict['dataset_counts_by_category'] = stats_ds_count_by_category.result_data['records'] #resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query # ------------------------- # Files created, by month # ------------------------- stats_monthly_file_counts = stats_files.get_file_count_by_month_published() if not stats_monthly_file_counts.has_error(): resp_dict['file_counts_by_month'] = list(stats_monthly_file_counts.result_data['records']) resp_dict['file_counts_by_month_sql'] = stats_monthly_file_counts.sql_query # ------------------------- # Files downloaded, by month # ------------------------- stats_monthly_downloads = stats_files.get_file_downloads_by_month_published(include_pre_dv4_downloads=True) if not stats_monthly_downloads.has_error(): resp_dict['file_downloads_by_month'] = list(stats_monthly_downloads.result_data['records']) resp_dict['file_downloads_by_month_sql'] = stats_monthly_downloads.sql_query # ------------------------- # File counts by content type # ------------------------- # rp: removing this from current charts """ stats_file_content_types = stats_files.get_datafile_content_type_counts_published() if not stats_file_content_types.has_error(): resp_dict['file_content_types'] = list(stats_file_content_types.result_data) resp_dict['file_content_types_sql'] = stats_file_content_types.sql_query resp_dict['file_content_types_top_20'] = list(stats_file_content_types.result_data)[:20] #resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4) """ return render(request, 'metrics/metrics_public.html', resp_dict)
sys.path.append(proj_path) #sys.path.append(dirname(proj_path)) django.setup() os.environ.setdefault("DJANGO_SETTINGS_MODULE", "miniverse.settings.local") EASY_STATISTICS = 'true' import random import numpy as np import pandas as pd from django.db.models import F from dv_apps.datasets.models import * from dv_apps.datafiles.models import FileMetadata from dv_apps.metrics.stats_util_base import StatsMakerBase from dv_apps.metrics.stats_view_base import StatsViewSwagger from dv_apps.metrics.stats_util_datasets import StatsMakerDatasets from dv_apps.metrics.stats_util_files import StatsMakerFiles # get_easy_dataset_category_counts miniset = StatsMakerBase() datasets = StatsMakerDatasets() files = StatsMakerFiles() print "Dataset count %s: \n" % datasets.get_easy_dataset_category_counts() print "Datasets count by month: \n %s" % datasets.get_easy_deposit_count_by_month( ).get_csv_content() print "Files by months: \n %s " % files.get_easy_file_downloads_by_month( ).get_csv_content() print "EASY categories: \n %s " % datasets.get_easy_dataset_category_counts( ) #.get_csv_content()
def view_public_visualizations(request, **kwargs): """ Return HTML/D3Plus visualizations for a variety of public statistics """ if kwargs and len(kwargs) > 0: # kwargs override GET parameters stats_datasets = StatsMakerDatasets(**kwargs) stats_dvs = StatsMakerDataverses(**kwargs) stats_files = StatsMakerFiles(**kwargs) else: stats_datasets = StatsMakerDatasets(**request.GET.dict()) stats_dvs = StatsMakerDataverses(**request.GET.dict()) stats_files = StatsMakerFiles(**request.GET.dict()) # Start an OrderedDict resp_dict = OrderedDict() # ------------------------- # Dataverses created each month # ------------------------- stats_result_dv_counts = stats_dvs.get_dataverse_counts_by_month_published( ) #import ipdb; ipdb.set_trace() if not stats_result_dv_counts.has_error(): resp_dict['dataverse_counts_by_month'] = list( stats_result_dv_counts.result_data['records']) resp_dict[ 'dataverse_counts_by_month_sql'] = stats_result_dv_counts.sql_query # ------------------------- # Dataverse counts by type # ------------------------- stats_result_dv_counts_by_type =\ stats_dvs.get_dataverse_counts_by_type_published(exclude_uncategorized=True) if not stats_result_dv_counts_by_type.has_error(): resp_dict[ 'dataverse_counts_by_type'] = stats_result_dv_counts_by_type.result_data[ 'records'] resp_dict[ 'dv_counts_by_category_sql'] = stats_result_dv_counts_by_type.sql_query # ------------------------- # Datasets created each month # ------------------------- stats_monthly_ds_counts = stats_datasets.get_dataset_counts_by_create_date_published( ) if not stats_monthly_ds_counts.has_error(): resp_dict['dataset_counts_by_month'] = list( stats_monthly_ds_counts.result_data['records']) resp_dict[ 'dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query stats_ds_count_by_subject = stats_datasets.get_dataset_subject_counts_published( ) if not stats_monthly_ds_counts.has_error(): resp_dict[ 'dataset_counts_by_subject'] = stats_ds_count_by_subject.result_data[ 'records'] #resp_dict['dataset_counts_by_month_sql'] = stats_monthly_ds_counts.sql_query # ------------------------- # Files created, by month # ------------------------- stats_monthly_file_counts = stats_files.get_file_count_by_month_published() if not stats_monthly_file_counts.has_error(): resp_dict['file_counts_by_month'] = list( stats_monthly_file_counts.result_data['records']) resp_dict[ 'file_counts_by_month_sql'] = stats_monthly_file_counts.sql_query # ------------------------- # Files downloaded, by month # ------------------------- stats_monthly_downloads = stats_files.get_file_downloads_by_month_published( include_pre_dv4_downloads=True) if not stats_monthly_downloads.has_error(): resp_dict['file_downloads_by_month'] = list( stats_monthly_downloads.result_data['records']) resp_dict[ 'file_downloads_by_month_sql'] = stats_monthly_downloads.sql_query # ------------------------- # File counts by content type # ------------------------- # rp: removing this from current charts """ stats_file_content_types = stats_files.get_datafile_content_type_counts_published() if not stats_file_content_types.has_error(): resp_dict['file_content_types'] = list(stats_file_content_types.result_data) resp_dict['file_content_types_sql'] = stats_file_content_types.sql_query resp_dict['file_content_types_top_20'] = list(stats_file_content_types.result_data)[:20] #resp_dict['file_content_types_json'] = json.dumps(file_content_types, indent=4) """ #success, datafile_content_type_counts =\ #stats_files.get_datafile_content_type_counts_published() #if success: # resp_dict['datafile_content_type_counts'] = datafile_content_type_counts[:15] return render(request, 'metrics/metrics_public.html', resp_dict)