def _stats_top_dataset_downloads(self, org_id): from ckan.lib.search.query import make_connection datasets_map = jql.downloads_per_organization_per_dataset_last_24_weeks_cached( ).get(org_id, {}) total_downloads = sum( (item.get('value') for item in datasets_map.values())) context = { 'model': model, 'user': c.user or c.author, 'auth_user_obj': c.userobj } data_dict = { 'q': '*:*', 'fl': 'id name title', 'fq': 'capacity:"public" id:({})'.format(' OR '.join( datasets_map.keys())), 'rows': len(datasets_map), 'start': 0, } ret = [] if datasets_map: try: conn = make_connection(decode_dates=False) search_result = conn.search(**data_dict) dataseta_meta_map = { d['id']: { 'title': d.get('title'), 'url': h.url_for('dataset_read', id=d.get('name')) } for d in search_result.docs } ret = [ { 'dataset_id': d.get('dataset_id'), 'name': dataseta_meta_map.get(d.get('dataset_id'), {}).get('title'), 'url': dataseta_meta_map.get(d.get('dataset_id'), {}).get('url'), 'value': d.get('value'), 'total': total_downloads, # 'percentage': round(100*d.get('value', 0)/total_downloads, 1) } for d in itertools.islice(( ds for ds in datasets_map.values() if ds.get('dataset_id') in dataseta_meta_map), 25) ] except Exception, e: log.warn('Error in searching solr {}'.format(str(e)))
def _find_potential_datasets_in_solr(self): from ckan.lib.search.query import make_connection dataseta_meta_map = {} data_dict = { 'q': '*:*', 'fl': 'id pageviews_last_14_days total_res_downloads', 'fq': 'total_res_downloads:[1 TO *] OR pageviews_last_14_days: [1 TO *]', 'start': 0, 'rows': 1000000 } try: conn = make_connection(decode_dates=False) search_result = conn.search(**data_dict) dataseta_meta_map = {d['id']: {'pageviews': d.get('pageviews_last_14_days', 0), 'downloads': d.get('total_res_downloads', 0)} for d in search_result.docs} except Exception, e: self.log.warn('Error in searching solr {}'.format(str(e)))
def _stats_top_dataset_downloads(self, org_id, org_name): from ckan.lib.search.query import make_connection datasets_map = jql.downloads_per_organization_per_dataset_last_24_weeks_cached().get( org_id, {}) total_downloads = sum((item.get('value') for item in datasets_map.values())) data_dict = { 'q': '*:*', 'fl': 'id name title', 'fq': 'capacity:"public" organization:{}'.format(org_name), 'rows': 5000, # Just setting a max, we need all public datasets that an org has 'start': 0, } ret = [] if datasets_map: mp_datasets_sorted = sorted(datasets_map.values(), key=lambda item: item.get('value'), reverse=True) try: conn = make_connection(decode_dates=False) search_result = conn.search(**data_dict) dataseta_meta_map = { d['id']: { 'title': d.get('title'), 'name': d.get('name'), } for d in search_result.docs } ret = [ { 'dataset_id': d.get('dataset_id'), 'name': dataseta_meta_map.get(d.get('dataset_id'), {}).get('title'), 'url': h.url_for('dataset_read', id=dataseta_meta_map.get(d.get('dataset_id'), {}).get('name')), 'value': d.get('value'), 'total': total_downloads, # 'percentage': round(100*d.get('value', 0)/total_downloads, 1) } for d in itertools.islice( (ds for ds in mp_datasets_sorted if ds.get('dataset_id') in dataseta_meta_map), 25 ) ] except Exception, e: log.warn('Error in searching solr {}'.format(str(e)))