def test_0_check_setup(self): offset = '/api/rest/package' resB = self.app.get(offset).body resA = self.sub_app_get(offset) pkgsB = json.loads(resB or '[]') pkgsA = json.loads(resA or '[]') assert len(pkgsA) == 2 assert len(pkgsB) == 0
def json_validator(value, context): if value == '': return value try: json.loads(value) except ValueError: raise Invalid('Invalid JSON') return value
def _get_resources(self, resType): ''' return id,uuid,title ''' # todo : transform into a generator using paged retrieving in API url = '%s/api/%s/' % (self.baseurl, resType) log.info('Retrieving %s at GeoNode URL %s', resType, url) request = urllib2.Request(url) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(), urllib2.HTTPRedirectHandler()) response = opener.open(request) content = response.read() json_content = json.loads(content) objects = json_content['objects'] ret = [] for layer in objects: lid = layer['id'] luuid = layer['uuid'] ltitle = layer['title'] log.info('%s: found %s %s %s', resType, lid, luuid, ltitle) ret.append({'id': lid, 'uuid': luuid, 'title': ltitle}) return ret
def duplicate_validator(key, data, errors, context): if errors[key]: return value = json.loads(data[key]) unduplicated = list(set(value)) data[key] = json.dumps(unduplicated)
def comuni(): response_body = '' try: comuni = json.loads(response_body) except Exception, inst: msg = "Couldn't read response from comuni service %r: %s" % (response_body, inst) raise Exception, inst
def get_package_dict(self, harvest_object): ''' Constructs a package_dict suitable to be passed to package_create or package_update. If a dict is not returned by this function, the import stage will be cancelled. :param harvest_object: HarvestObject domain object (with access to job and source objects) :type harvest_object: HarvestObject :returns: A dataset dictionary (package_dict) :rtype: dict ''' json_dict = json.loads(harvest_object.content) doc_type = json_dict[GEONODE_TYPE] if doc_type == GEONODE_LAYER_TYPE: return self.get_layer_package_dict(harvest_object, harvest_object.content) elif doc_type == GEONODE_MAP_TYPE: return self.get_map_package_dict(harvest_object, harvest_object.content) elif doc_type == GEONODE_DOC_TYPE: return self.get_doc_package_dict(harvest_object, harvest_object.content) else: log.error('Unknown GeoNode type %s' % doc_type) return None
def append_time_period(key, data, errors, context): if errors[key]: return value = data[key] if data.get(('time_period',), ''): out = json.loads(value) out.append(data[('time_period',)]) data[key] = json.dumps(out)
def test_group_autocomplete(self): url = url_for(controller="api", action="group_autocomplete", ver=2) assert_equal(url, "/api/2/util/group/autocomplete") response = self.app.get(url=url, params={"q": u"dave"}, status=200) results = json.loads(response.body) assert_equal(len(results), 1) assert_equal(results[0]["name"], "david") assert_equal(results[0]["title"], "Dave's books") assert_equal(response.header("Content-Type"), "application/json;charset=utf-8")
def _set_source_config(self, config_str): ''' Loads the source configuration JSON object into a dict for convenient access ''' if config_str: self.source_config = json.loads(config_str) log.debug('Using config: %r' % self.source_config) else: self.source_config = {}
def test_simple_dump_json(self): json_filepath = '/tmp/dump.tmp' self.db.args = ('simple-dump-json %s' % json_filepath).split() self.db.simple_dump_json() assert os.path.exists(json_filepath), json_filepath f_obj = open(json_filepath, "r") rows = json.loads(f_obj.read()) assert set(rows[0].keys()) > set(('id', 'name', 'title')), rows[0].keys() pkg_names = set(row['name'] for row in rows) assert 'annakarenina' in pkg_names, pkg_names assert 'warandpeace' not in pkg_names, pkg_names
def test_status(self): response = self.app.get(url=url_for(controller="api", action="status", ver=2), params={}, status=200) res = json.loads(response.body) assert_equal(res["ckan_version"], __version__) assert_equal(res["site_url"], "http://test.ckan.net") assert_equal(res["site_title"], "CKAN") assert_equal(res["site_description"], "") assert_equal(res["locale_default"], "en") assert_equal(type(res["extensions"]), list) expected_extensions = set(("stats",)) assert_equal(set(res["extensions"]), expected_extensions)
def load_licenses(self, license_url): try: response = urllib2.urlopen(license_url) response_body = response.read() except Exception as inst: msg = "Couldn't connect to licenses service %r: %s" % (license_url, inst) raise Exception(msg) try: license_data = json.loads(response_body) except Exception as inst: msg = "Couldn't read response from licenses service %r: %s" % (response_body, inst) raise Exception(inst) self._create_license_list(license_data, license_url)
def before_index(self, data_dict): data_dict.update({'data_type_facet': '', 'proj_facet': '', 'language_facet': '', 'encoding_facet': '', 'theme_keyword_facets': [], 'loc_keyword_facet': []}) fields = helpers.get_field_choices('dataset') for field_name in ['data_type', 'proj', 'language', 'encoding']: value = data_dict.get(field_name) if value: data_dict[field_name+'_facet'] = fields[field_name][value] if data_dict.get('theme_keyword'): data_dict['theme_keyword_facets'] = json.loads(data_dict.get('theme_keyword')) #For old schema definition for i in range(5): field_name = 'theme_keyword_' + str(i+1) if isinstance(data_dict.get(field_name), unicode): data_dict['theme_keyword_facets'].append(fields['theme_keyword'].get(data_dict[field_name])) if data_dict.get('loc_keyword'): data_dict['loc_keyword_facet'] = json.loads(data_dict.get('loc_keyword')) if isinstance(data_dict['loc_keyword_facet'], list): data_dict['loc_keyword_facet'] = [fields['loc_keyword'][loc_keyword] for loc_keyword in filter(None, data_dict['loc_keyword_facet'])] #For old schema definition elif isinstance(data_dict['loc_keyword_facet'], int): data_dict['loc_keyword_facet'] = fields['loc_keyword'][str(data_dict['loc_keyword'])] return data_dict
def _get_request_data(try_url_params=False): u'''Returns a dictionary, extracted from a request. If there is no data, None or "" is returned. ValueError will be raised if the data is not a JSON-formatted dict. The data is retrieved as a JSON-encoded dictionary from the request body. Or, if the `try_url_params` argument is True and the request is a GET request, then an attempt is made to read the data from the url parameters of the request. try_url_params If try_url_params is False, then the data_dict is read from the request body. If try_url_params is True and the request is a GET request then the data is read from the url parameters. The resulting dict will only be 1 level deep, with the url-param fields being the keys. If a single key has more than one value specified, then the value will be a list of strings, otherwise just a string. ''' def mixed(multi_dict): u'''Return a dict with values being lists if they have more than one item or a string otherwise ''' out = {} for key, value in multi_dict.to_dict(flat=False).iteritems(): out[key] = value[0] if len(value) == 1 else value return out if not try_url_params and request.method == u'GET': raise ValueError(u'Invalid request. Please use POST method ' 'for your request') request_data = {} if request.method in [u'POST', u'PUT'] and request.form: if (len(request.form.values()) == 1 and request.form.values()[0] in [u'1', u'']): try: request_data = json.loads(request.form.keys()[0]) except ValueError, e: raise ValueError( u'Error decoding JSON data. ' 'Error: %r ' 'JSON data extracted from the request: %r' % (e, request_data)) else: request_data = mixed(request.form)
def test_group_autocomplete(self): url = url_for(controller='api', action='group_autocomplete', ver=2) assert_equal(url, '/api/2/util/group/autocomplete') response = self.app.get( url=url, params={ 'q': u'dave', }, status=200, ) results = json.loads(response.body) assert_equal(len(results), 1) assert_equal(results[0]['name'], 'david') assert_equal(results[0]['title'], 'Dave\'s books') assert_equal(response.header('Content-Type'), 'application/json;charset=utf-8')
def test_status(self): response = self.app.get( url=url_for(controller='api', action='status', ver=2), params={}, status=200, ) res = json.loads(response.body) assert_equal(res['ckan_version'], __version__) assert_equal(res['site_url'], 'http://test.ckan.net') assert_equal(res['site_title'], 'CKAN') assert_equal(res['site_description'], '') assert_equal(res['locale_default'], 'en') assert_equal(type(res['extensions']), list) expected_extensions = set(('stats',)) assert_equal(set(res['extensions']), expected_extensions)
def addHighlightedText(self, highlighting_dict, results): ''' This function adds the highlighted text returned by the solr search to package extras. ''' if(results): for result in results: id = result['index_id'] package_dict = json.loads(result['data_dict']) if id in highlighting_dict.keys(): #if len(highlighting_dict[id]) > 0: package_dict['extras'].append({'value': highlighting_dict[id], 'key' : 'highlighting'}) result['data_dict'] = json.dumps(package_dict) return results
def load_licenses(self, license_url): try: response = urllib2.urlopen(license_url) response_body = response.read() except Exception as inst: msg = "Couldn't connect to licenses service %r: %s" % (license_url, inst) raise Exception(msg) try: license_data = json.loads(response_body) except Exception as inst: msg = "Couldn't read response from licenses service %r: %s" % (response_body, inst) raise Exception(inst) for license in license_data: if isinstance(license, string_types): license = license_data[license] if license.get('title'): license['title'] = _(license['title']) self._create_license_list(license_data, license_url)
def filtered_download(self, resource_view_id): params = json.loads(request.params['params']) resource_view = get_action(u'resource_view_show')( None, {u'id': resource_view_id}) search_text = text_type(params['search']['value']) view_filters = resource_view.get(u'filters', {}) user_filters = text_type(params['filters']) filters = merge_filters(view_filters, user_filters) datastore_search = get_action(u'datastore_search') unfiltered_response = datastore_search(None, { u"resource_id": resource_view[u'resource_id'], u"limit": 0, u"filters": view_filters, }) cols = [f['id'] for f in unfiltered_response['fields']] if u'show_fields' in resource_view: cols = [c for c in cols if c in resource_view['show_fields']] sort_list = [] for order in params['order']: sort_by_num = int(order['column']) sort_order = ( u'desc' if order['dir'] == u'desc' else u'asc') sort_list.append(cols[sort_by_num] + u' ' + sort_order) cols = [c for (c, v) in zip(cols, params['visible']) if v] h.redirect_to( h.url_for( controller=u'ckanext.datastore.controller:DatastoreController', action=u'dump', resource_id=resource_view[u'resource_id']) + u'?' + urlencode({ u'q': search_text, u'sort': u','.join(sort_list), u'filters': json.dumps(filters), u'format': request.params['format'], u'fields': u','.join(cols), }))
def validate_config(self, source_config): if not source_config: return source_config log = logging.getLogger(__name__ + '.geonode.config') try: source_config_obj = json.loads(source_config) # GeoNode does not expose the internal GeoServer URL, so we have to config it on its own if not CONFIG_GEOSERVERURL in source_config_obj: raise ValueError('geoserver_url is mandatory') if not isinstance(source_config_obj[CONFIG_GEOSERVERURL], basestring): raise ValueError('geoserver_url should be a string') if 'import_wfs_as_csv' in source_config_obj: if not source_config_obj['import_wfs_as_csv'] in ['true', 'false']: raise ValueError('import_wfs_as_csv should be either true or false') if 'import_wfs_as_wfs' in source_config_obj: if not isinstance(source_config_obj['import_wfs_as_wfs'], bool): raise ValueError('import_wfs_as_wfs should be either true or false') if CONFIG_IMPORT_FIELDS in source_config_obj: if not isinstance(source_config_obj[CONFIG_IMPORT_FIELDS], list): raise ValueError('%s should be a list', CONFIG_IMPORT_FIELDS) self.checkMapping(CONFIG_KEYWORD_MAPPING, source_config_obj, list) self.checkMapping(CONFIG_GROUP_MAPPING, source_config_obj, basestring) if CONFIG_GROUP_MAPPING in source_config_obj and CONFIG_GROUP_MAPPING_FIELDNAME not in source_config_obj: raise ValueError('%s needs also %s to be defined', CONFIG_GROUP_MAPPING, CONFIG_GROUP_MAPPING_FIELDNAME) except ValueError as e: log.warning("Config parsing error: %r", e) raise e return source_config
def load_json(obj, **kw): return json.loads(obj, **kw)
def resource_upload_datastore(context,data_dict): print data_dict id_risorsa= data_dict['id'] #print id_risorsa url = id_risorsa import unicodedata import csv url= unicodedata.normalize('NFKD', url).encode('ascii', 'ignore') # str url del csv path1 = url[:3] path2 = url[3:6] filename = url[6:] #storage_path = '/opt/ckan/default/storage/resources' storage_path = '/var/lib/ckan/default/resources' # ckandemo input_file = os.path.join(storage_path, path1, path2, filename) #print input_file + "\n\n" csvfile = open(input_file, 'rU') reader = csv.DictReader(csvfile) # Created a list and adds the rows of the csv to the list json_list = [] i= 0 for row in reader: for elem in row: row[elem] = str(row[elem]) #print str(row[elem]) json_list.append(row) print row i = i+1 if i==2: break upload_host = 'datacatalog.regione.emilia-romagna.it/catalogCTA' upload_api = '05320953-abbc-4217-a0c3-4175f627828d' #host = '10.5.16.82:5000' #ckandemo #idUtente = '6d0fccec-859a-4b99-995c-a026b0d5e1f3' requesta = urllib2.Request('https://' + upload_host + '/api/action/datastore_create') requesta.add_header('Authorization', upload_api) id_risorsa2 = data_dict['id2'] print id_risorsa print id_risorsa2 store_dict = { 'force': True, 'resource_id': id_risorsa2, 'records': json_list } data_string = urllib2.quote(json.dumps(store_dict)) print data_string # Make the HTTP request. response = urllib2.urlopen(requesta, data_string.encode('utf8')).read() response_dict = json.loads(response.decode('utf-8')) # Controlla if TRUE assert response_dict['success'] is True print response_dict['success'] """ #json_list = json.dumps(json.loads(json_list)) #json_list.replace("'",'"') #json_list = json.loads(json_list) print json_list print type(json_list) store_dict = {"force": True, "resource_id": id_risorsa, "records": json_list} print 'before datastore create\n' try: query = get_action('datastore_create')(context, store_dict) # DATASTORE CREATE except: print '\nERRORE in datastore create! =(' """ print "end package.py/datastore\n\n"
def _getjson(self): return json.loads(self.body)
def _get_request_data(try_url_params=False): u'''Returns a dictionary, extracted from a request. If there is no data, None or "" is returned. ValueError will be raised if the data is not a JSON-formatted dict. The data is retrieved as a JSON-encoded dictionary from the request body. Or, if the `try_url_params` argument is True and the request is a GET request, then an attempt is made to read the data from the url parameters of the request. try_url_params If try_url_params is False, then the data_dict is read from the request body. If try_url_params is True and the request is a GET request then the data is read from the url parameters. The resulting dict will only be 1 level deep, with the url-param fields being the keys. If a single key has more than one value specified, then the value will be a list of strings, otherwise just a string. ''' def mixed(multi_dict): u'''Return a dict with values being lists if they have more than one item or a string otherwise ''' out = {} for key, value in multi_dict.to_dict(flat=False).iteritems(): out[key] = value[0] if len(value) == 1 else value return out if not try_url_params and request.method == u'GET': raise ValueError(u'Invalid request. Please use POST method ' 'for your request') request_data = {} if request.method in [u'POST', u'PUT'] and request.form: if (len(request.form.values()) == 1 and request.form.values()[0] in [u'1', u'']): try: request_data = json.loads(request.form.keys()[0]) except ValueError as e: raise ValueError( u'Error decoding JSON data. ' 'Error: %r ' 'JSON data extracted from the request: %r' % (e, request_data)) else: request_data = mixed(request.form) elif request.args and try_url_params: request_data = mixed(request.args) elif (request.data and request.data != u'' and request.content_type != u'multipart/form-data'): try: request_data = request.get_json() except BadRequest as e: raise ValueError(u'Error decoding JSON data. ' 'Error: %r ' 'JSON data extracted from the request: %r' % (e, request_data)) if not isinstance(request_data, dict): raise ValueError(u'Request data JSON decoded to %r but ' 'it needs to be a dictionary.' % request_data) if request.method == u'PUT' and not request_data: raise ValueError(u'Invalid request. Please use the POST method for ' 'your request') for field_name, file_ in request.files.iteritems(): request_data[field_name] = file_ log.debug(u'Request data extracted: %r', request_data) return request_data
def loads(self, chars): try: return json.loads(chars) except ValueError, inst: raise Exception, "Couldn't loads string '%s': %s" % (chars, inst)
def process_result_value(self, value, engine): if value is None: return None else: return json.loads(value)
def read_data(self, data=None): errors="" data = data or clean_dict(dict_fns.unflatten(tuplize_dict(parse_params(request.POST)))) save_action = request.params.get('save') print(data) cat_url=data['cat_url'] if cat_url=="": errors =errors+"Catalogue Url field should not be empty."+'\n' if 'http' not in cat_url: errors =errors+"Invalid Catalogue Url."+'\n' datasets_list_url=data['datasets_list_url'] if datasets_list_url=="": errors =errors+"Datasets List Url field should not be empty."+'\n' dataset_url=data['dataset_url'] #if dataset_url=="": #errors =errors+"Dataset Url field should not be empty."+'\n' datasets_list_identifier=data['datasets_list_identifier'] if datasets_list_identifier=="": errors =errors+"Datasets List Json Key field should not be empty."+'\n' dataset_id=data['dataset_id'] if dataset_id=="": errors =errors+" Dataset Id Json Key field should not be empty."+'\n' apikey=data['apikey'] print(apikey) metadata_mappings=data['metadata_mappings'] try: metadata_mappings=json.loads(metadata_mappings) except: errors =errors+"Invalid Json!"+'\n' vars = {'data': data, 'errors': str(errors)} catalogues_description=str(data['catalogues_description']) catalogue_country=str(data['catalogue_country']) catalogue_language=str(data['language']) catalogue_title=str(data['catalogue_title']) harvest_frequency=str(data['harvest_frequency']) name=catalogue_title.replace('.','-').replace(' ','-').replace('_','-').replace('(','-').replace(')','-').replace('[','-').replace(']','-').replace(',','-').replace(':','-') package_id=cat_url config='{"read_only": true, "default_tags": [], "remote_groups": "only_local", "remote_orgs": "create", "default_extras": {"harvest_url": "{harvest_source_url}/dataset/{dataset_id}", "new_extra": "Test"}, "default_groups": ["french"], "user": "******", "api_key": "<REMOTE_API_KEY>", "override_extras": true, "api_version": 1}' config=json.loads(config) dataset_dict = { 'name': str(name), 'id':str(uuid.uuid3(uuid.NAMESPACE_OID, str(cat_url))), 'frequency':str(harvest_frequency), 'url': str(cat_url), 'title': str(catalogue_title), 'package_id':str(package_id), 'source_type':'genericapi', 'notes':str(catalogues_description), 'catalogue_country':str(catalogue_country), 'language':str(catalogue_language), 'catalogue_date_created':'', 'catalogue_date_updated':'', 'metadata_mappings':json.dumps(metadata_mappings), "datasets_list_url":str(datasets_list_url), "dataset_url":str(dataset_url), "datasets_list_identifier":str(datasets_list_identifier), "dataset_id":str(dataset_id), "apikey":str(apikey) } print(dataset_dict) #AddResourceToCkan.AddResourceToCkan(dataset_dict) context = {'model': model, 'session': Session, 'user': u'admin','message': '','save': True} try: harvest_source_create(context,dataset_dict) except toolkit.ValidationError as ex: print(ex) vars = {'data': data, 'errors': str(ex)} #return render('htmlharvest1.html', extra_vars=vars) return render('generic.html', extra_vars=vars) dataset_dict.update({'config':str(metadata_mappings)}) return render('read.html', extra_vars=vars)
query['defType'] = 'dismax' query['tie'] = '0.1' # this minimum match is explained # http://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29 query['mm'] = '2<-1 5<80%' query['qf'] = query.get('qf', QUERY_FIELDS) conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError('SOLR returned an error running query: %r Error: %r' % (query, e.reason)) try: data = json.loads(solr_response) response = data['response'] self.count = response.get('numFound', 0) self.results = response.get('docs', []) # #1683 Filter out the last row that is sometimes out of order self.results = self.results[:rows_to_return] # get any extras and add to 'extras' dict for result in self.results: extra_keys = filter(lambda x: x.startswith('extras_'), result.keys()) extras = {} for extra_key in extra_keys: value = result.pop(extra_key) extras[extra_key[len('extras_'):]] = value if extra_keys:
def geojson_to_wkt(value): return wkt.dumps(json.loads(value))
def get_recommended_datasets(pkg_id): package = toolkit.get_action('package_show')(None, {'id': pkg_id.strip()}) response_data = {} RtpaApi = cf.get( 'ckan.extensions.rtpa_tet_dataset_automatic_recommendations.rtpa_api', False) if "linked_datasets" in package and package["linked_datasets"] != "": l = [] pkgs = package["linked_datasets"].split(",") for pkg in pkgs: #log.debug("PKG_ID:"+pkg_id) #log.debug("type of:"+str(type(pkg_id))) p = toolkit.get_action('package_show')(None, {'id': pkg}) item = {} item["name"] = pkg item["title"] = p["title"] item["notes"] = p["notes"] l.append(item) response_data["datasets"] = l if RtpaApi: relateddatasets = [] url = RtpaApi + package["id"] + "/3" try: data = json.loads(urllib2.urlopen(url).read()) except Exception as e: print(e) i = 0 for element in data['result']: item = {} item["name"] = element['id'] item["title"] = element['title'] item["notes"] = element['notes'] relateddatasets.append(item) i += 1 if (i == 10): break response_data["datasets"] = relateddatasets else: q = '' category_string = '' taget_audience_string = '' if "category" in package and not package["category"] == "": category_string = "category:\"" + package["category"] + "\"~25" if "target_audience" in package and not package[ "target_audience"] == "": taget_audience_string = "target_audience:\"" + package[ "target_audience"] + "\"~25" if (category_string and taget_audience_string): q = category_string + " OR " + taget_audience_string elif (category_string): q = category_string elif (taget_audience_string): q = taget_audience_string data_dict = { 'qf': 'target_audience^4 category^4 name^4 title^4 tags^2 groups^2 text', 'q': q, 'rows': 5 } #log.debug(q) response_data["datasets"] = toolkit.get_action('package_search')( None, data_dict)["results"] for ds in response_data["datasets"]: if ds["name"] == pkg_id: response_data["datasets"].remove(ds) return response_data
def validate(self, context, data_dict, schema, action): """ Validate and convert for package_create, package_update and package_show actions. """ thing, action_type = action.split('_') t = data_dict.get('type') if not t or t not in self._schemas: return data_dict, { 'type': ["Unsupported dataset type: {t}".format(t=t)] } scheming_schema = self._expanded_schemas[t] before = scheming_schema.get('before_validators') after = scheming_schema.get('after_validators') if action_type == 'show': get_validators = _field_output_validators before = after = None elif action_type == 'create': get_validators = _field_create_validators else: get_validators = _field_validators if before: schema['__before'] = validation.validators_from_string( before, None, scheming_schema) if after: schema['__after'] = validation.validators_from_string( after, None, scheming_schema) fg = ((scheming_schema['dataset_fields'], schema, True), (scheming_schema['resource_fields'], schema['resources'], False)) composite_convert_fields = [] for field_list, destination, convert_extras in fg: for f in field_list: convert_this = convert_extras and f['field_name'] not in schema destination[f['field_name']] = get_validators( f, scheming_schema, convert_this) if convert_this and 'repeating_subfields' in f: composite_convert_fields.append(f['field_name']) def composite_convert_to(key, data, errors, context): unflat = unflatten(data) for f in composite_convert_fields: if f not in unflat: continue data[(f, )] = json.dumps(unflat[f], default=lambda x: None if x == missing else x) convert_to_extras((f, ), data, errors, context) del data[(f, )] if action_type == 'show': if composite_convert_fields: for ex in data_dict['extras']: if ex['key'] in composite_convert_fields: data_dict[ex['key']] = json.loads(ex['value']) data_dict['extras'] = [ ex for ex in data_dict['extras'] if ex['key'] not in composite_convert_fields ] else: dataset_composite = { f['field_name'] for f in scheming_schema['dataset_fields'] if 'repeating_subfields' in f } if dataset_composite: expand_form_composite(data_dict, dataset_composite) resource_composite = { f['field_name'] for f in scheming_schema['resource_fields'] if 'repeating_subfields' in f } if resource_composite and 'resources' in data_dict: for res in data_dict['resources']: expand_form_composite(res, resource_composite) # convert composite package fields to extras so they are stored if composite_convert_fields: schema = dict(schema, __after=schema.get('__after', []) + [composite_convert_to]) return navl_validate(data_dict, schema, context)
def search(self): from ckan.lib.search import SearchError, SearchQueryError package_type = self._guess_package_type() try: context = { 'model': model, 'user': c.user, 'auth_user_obj': c.userobj } check_access('site_read', context) except NotAuthorized: abort(403, _('Not authorized to see this page')) # unicode format (decoded from utf8) q = c.q = request.params.get('q', u'') org = request.params.get('organization', None) c.query_error = False page = h.get_page_number(request.params) try: limit = int(config.get('ckan.datasets_per_page', 20)) except: limit = 20 # most search operations should reset the page counter: params_nopage = [(k, v) for k, v in request.params.items() if k != 'page'] def drill_down_url(alternative_url=None, **by): return h.add_url_param(alternative_url=alternative_url, controller='package', action='search', new_params=by) c.drill_down_url = drill_down_url def remove_field(key, value=None, replace=None): return h.remove_url_param(key, value=value, replace=replace, controller='package', action='search', alternative_url=package_type) c.remove_field = remove_field sort_by = request.params.get('sort', None) params_nosort = [(k, v) for k, v in params_nopage if k != 'sort'] def _sort_by(fields): """ Sort by the given list of fields. Each entry in the list is a 2-tuple: (fieldname, sort_order) eg - [('metadata_modified', 'desc'), ('name', 'asc')] If fields is empty, then the default ordering is used. """ params = params_nosort[:] if fields: sort_string = ', '.join('%s %s' % f for f in fields) params.append(('sort', sort_string)) return search_url(params, package_type) c.sort_by = _sort_by if not sort_by: c.sort_by_fields = [] else: c.sort_by_fields = [ field.split()[0] for field in sort_by.split(',') ] def pager_url(q=None, page=None): params = list(params_nopage) params.append(('page', page)) return search_url(params, package_type) c.search_url_params = urlencode(_encode_params(params_nopage)) try: c.fields = [] # c.fields_grouped will contain a dict of params containing # a list of values eg {'tags':['tag1', 'tag2']} c.fields_grouped = {} search_extras = {} fq = '' for (param, value) in request.params.items(): if param not in ['q', 'page', 'sort'] \ and len(value) and not param.startswith('_'): if not param.startswith('ext_'): c.fields.append((param, value)) fq += ' %s:"%s"' % (param, value) if param not in c.fields_grouped: c.fields_grouped[param] = [value] else: c.fields_grouped[param].append(value) else: search_extras[param] = value context = { 'model': model, 'session': model.Session, 'user': c.user, 'for_view': True, 'auth_user_obj': c.userobj } # Unless changed via config options, don't show other dataset # types any search page. Potential alternatives are do show them # on the default search page (dataset) or on one other search page search_all_type = config.get('ckan.search.show_all_types', 'dataset') search_all = False map_results = None try: # If the "type" is set to True or False, convert to bool # and we know that no type was specified, so use traditional # behaviour of applying this only to dataset type search_all = asbool(search_all_type) search_all_type = 'dataset' # Otherwise we treat as a string representing a type except ValueError: search_all = True if not package_type: package_type = 'dataset' if not search_all or package_type != search_all_type: # Only show datasets of this particular type fq += ' +dataset_type:{type}'.format(type=package_type) facets = OrderedDict() default_facet_titles = { 'organization': _('Organizations'), 'groups': _('Groups'), 'tags': _('Tags'), 'res_format': _('Formats'), 'license_id': _('Licenses'), } for facet in h.facets(): if facet in default_facet_titles: facets[facet] = default_facet_titles[facet] else: facets[facet] = facet # Facet titles for plugin in p.PluginImplementations(p.IFacets): facets = plugin.dataset_facets(facets, package_type) c.facet_titles = facets data_dict = { 'q': q, 'fq': fq.strip(), 'facet.field': facets.keys(), 'rows': limit, 'start': (page - 1) * limit, 'sort': sort_by, 'extras': search_extras, 'include_private': asbool(config.get('ckan.search.default_include_private', True)), } query = get_action('package_search')(context, data_dict) # loop the search query and get all the results # this workaround the 1000 rows solr hard limit HARD_LIMIT = 1000 conn = get_connection_redis() pager = 0 # crank up the pager limit high as using points cluster for better performance PAGER_LIMIT = 10000 data_dict_full_result = { 'q': q, 'fq': fq.strip(), 'facet.field': facets.keys(), 'rows': HARD_LIMIT, 'start': 0, 'sort': sort_by, 'extras': search_extras, 'include_private': asbool(config.get('ckan.search.default_include_private', True)), } if not org: # if no q, it is an init load or direct visit on / dataset log.info('### Not org ###') if not q: log.info('### Not q ###') if not conn.exists('redis_full_results'): log.info('### generating full results ###') # get full results and add to redis when there is not full results in redis full_results = self.get_full_results( context, data_dict_full_result, pager, PAGER_LIMIT, q, fq, facets, HARD_LIMIT, sort_by, search_extras) map_results = self.get_map_result(full_results) # adding to redis log.info('adding full results to redis') conn.set('redis_full_results', json.dumps(map_results)) # log.info(c.full_results) else: log.info('### using cached full results ###') map_results = json.loads( conn.get('redis_full_results')) # log.info(c.full_results) else: log.info('### With q ###') full_results = self.get_full_results( context, data_dict_full_result, pager, PAGER_LIMIT, q, fq, facets, HARD_LIMIT, sort_by, search_extras) map_results = self.get_map_result(full_results) else: log.info('### With org ###') if not q: log.info('### Not q ###') if not conn.exists('redis_full_results_%s' % org): log.info('### generating %s results ###' % org) # get full results and add to redis when there is not full results in redis full_results = self.get_full_results( context, data_dict_full_result, pager, PAGER_LIMIT, q, fq, facets, HARD_LIMIT, sort_by, search_extras) map_results = self.get_map_result(full_results) # adding to redis log.info('adding %s results to redis' % org) conn.set('redis_full_results_%s' % org, json.dumps(map_results)) # log.info(c.full_results) else: log.info('### using cached %s results ###' % org) map_results = json.loads( conn.get('redis_full_results_%s' % org)) # log.info(c.full_results) else: log.info('### With q ###') full_results = self.get_full_results( context, data_dict_full_result, pager, PAGER_LIMIT, q, fq, facets, HARD_LIMIT, sort_by, search_extras) map_results = self.get_map_result(full_results) # log.info(c.full_results) c.sort_by_selected = query['sort'] c.page = h.Page(collection=query['results'], page=page, url=pager_url, item_count=query['count'], items_per_page=limit) c.search_facets = query['search_facets'] c.page.items = query['results'] except SearchQueryError as se: # User's search parameters are invalid, in such a way that is not # achievable with the web interface, so return a proper error to # discourage spiders which are the main cause of this. log.info('Dataset search query rejected: %r', se.args) abort( 400, _('Invalid search query: {error_message}').format( error_message=str(se))) except SearchError as se: # May be bad input from the user, but may also be more serious like # bad code causing a SOLR syntax error, or a problem connecting to # SOLR log.error('Dataset search error: %r', se.args) c.query_error = True c.search_facets = {} c.page = h.Page(collection=[]) except NotAuthorized: abort(403, _('Not authorized to see this page')) c.search_facets_limits = {} for facet in c.search_facets.keys(): try: limit = int( request.params.get( '_%s_limit' % facet, int(config.get('search.facets.default', 10)))) except ValueError: abort( 400, _('Parameter "{parameter_name}" is not ' 'an integer').format(parameter_name='_%s_limit' % facet)) c.search_facets_limits[facet] = limit self._setup_template_variables(context, {}, package_type=package_type) return render(self._search_template(package_type), extra_vars={ 'dataset_type': package_type, 'map_results': map_results })
def loads(self, chars): try: return json.loads(chars) except ValueError as inst: raise Exception("Couldn't loads string '%s': %s" % (chars, inst))
def fetch_stage(self, harvest_object): # Check harvest object status status = self._get_object_extra(harvest_object, 'status') if status == 'delete': # No need to fetch anything, just pass to the import stage return True log = logging.getLogger(__name__ + '.GeoNode.fetch') log.debug('GeoNodeHarvester fetch_stage for object: %s', harvest_object.id) url = harvest_object.source.url client = GeoNodeClient(url) guid = harvest_object.guid content = harvest_object.content obj = json.loads(content) gnid = obj['id'] if 'type' not in obj: log.error("Bad content in harvest object ID: %d GUID: %s [%s]" % (gnid, guid, content)) if GEONODE_TYPE in obj: # it means it already contains data read in this fetch stage. We were expecting info from the gather stage instead log.warning("Harvest object is in the wrong state ID: %d GUID: %s" % (gnid, guid)) self._save_object_error("Bad content in harvest object ID: %d GUID: %s [%s]" % (gnid, guid, content), harvest_object) return False objtype = obj['type'] try: if objtype == GEONODE_LAYER_TYPE: georesource_json = client.get_layer_json(gnid) objdict = json.loads(georesource_json) elif objtype == GEONODE_MAP_TYPE: georesource_json = client.get_map_json(gnid) objdict = json.loads(georesource_json) # set into the map object the geoexp configuration blob map_blob_json = client.get_map_data(gnid) # enriching the json with some more info objdict['MAP_DATA'] = map_blob_json elif objtype == GEONODE_DOC_TYPE: georesource_json = client.get_doc_json(gnid) objdict = json.loads(georesource_json) else: log.error("Unknown GeoNode resource type %s for ID: %d GUID: %s " % (objtype, gnid, guid)) self._save_object_error("Unknown GeoNode resource type %s for ID: %d GUID: %s " % (objtype, gnid, guid), harvest_object) return False objdict[GEONODE_TYPE] = objtype final_json = json.dumps(objdict) except Exception as e: log.error('Error getting GeoNode %s ID %d GUID %s [%r]' % (objtype, gnid, guid, e), e) self._save_object_error('Error getting GeoNode %s ID %d GUID %s [%r]' % (objtype, gnid, guid, e), harvest_object) return False if final_json is None: self._save_object_error('Empty record for GUID %s type %s' % (guid, objtype), harvest_object) return False try: harvest_object.content = final_json.strip() harvest_object.save() except Exception as e: self._save_object_error('Error saving the harvest object for GUID %s type %s [%r]' % (guid, objtype, e), harvest_object) return False log.debug('JSON content saved for %s (size %s)' % (objtype, len(final_json))) return True
def __init__(self, json_string): self._dict = json.loads(json_string)
def column_summary(self, url, field_id): try: data = json.loads(urllib2.urlopen(url).read()) temp_data = json_normalize(data["result"]["records"]) fields = data["result"]["fields"] # type_unified TODO record_count = 0 results = { "help": "http://google.com", "success": True, "result": { "records": [], "fields": [{ "id": "Name", "type": "text" }, { "id": "Range", "type": "text" }, { "id": "Frequency", "type": "numeric" }], "total": 0, "limit": 99999, } } for f in fields: if f["id"] == field_id: break if f["type"] == "numeric": c = f["id"] temp_data[c] = pd.to_numeric(temp_data[c], errors='coerce') #print(temp_data) dist = np.histogram(temp_data[c], 11) for i in range(0, 11): record = { "Name": c, "Range": str(round(dist[1][i])) + " to " + str(round(dist[1][i + 1])), "Frequency": int(dist[0][i]) } results["result"]["records"].append(record) record_count += 1 if f["type"] == "text": c = f["id"] counts = Counter(temp_data[c]) for item in counts.most_common(10): value = item[0] if len(value) > 35: value = value[:35] + "..." record = {"Name": c, "Value": value, "Count": item[1]} results["result"]["records"].append(record) record_count += 1 results["result"]["fields"] = [{ "id": "Name", "type": "text" }, { "id": "Value", "type": "text" }, { "id": "count", "type": "numeric" }] results["result"]["total"] = record_count results["Access-Control-Allow-Origin"] = "*" json_response = json.dumps(results) #json_response["Access-Control-Allow-Origin"] = "*" #print("Response") #print(json_response) return json_response except Exception as e: print("ERROR\n\n\n") print(e) return json.dumps({'success': False})
def convert_string_to_dict(string): # return ast.literal_eval(string) return json.loads(string) if string else {}
def check_solr_result(context, source_list, limit): ''' Encapsulate the check if a dataset returned by solr really exists in CKAN database :param context: :param source_list: :param limit: :return: ''' results = [] data_source = 'validated_data_dict' count = 0 locale = tk.request.environ['CKAN_LANG'] or config.get( 'ckan.locale_default', 'en') if context.get('for_view', False): for package in source_list: try: package['resources'] = [ json.loads(res) for res in package.get('resource_list', []) if res ] except Exception as e: log.error(e.message, e) if locale != 'en' and package.get('title_{0}'.format(locale), ''): package['title'] = package.get('title_{0}'.format(locale)) if locale != 'en' and package.get('text_{0}'.format(locale), ''): package['notes'] = package.get('text_{0}'.format(locale)) # if context.get('for_view'): # for item in plugins.PluginImplementations(plugins.IPackageController): # package = item.before_view(package) # for resource in package.get('resources', []): # input_list = int(resource.get('download_total_resource','') or '0') input_list = [ int(resource.get('download_total_resource', '') or '0') for resource in package.get('resources', []) ] if not input_list: input_list = [0] download_count = reduce(lambda x, y: x + y, input_list) package['download_total'] = download_count results.append(package) count += 1 if count == limit: break else: for package in source_list: try: package = logic.get_action('package_show')( context, { 'id': package['name'] }) except Exception as e: log.error(e.message, e) results.append(package) count += 1 if count == limit: break return results
def before_index(self, data_dict): value = data_dict.get('keywords', []) if value: data_dict['keywords_facet'] = json.loads(value) return data_dict
def _get_request_data(try_url_params=False): u'''Returns a dictionary, extracted from a request. If there is no data, None or "" is returned. ValueError will be raised if the data is not a JSON-formatted dict. The data is retrieved as a JSON-encoded dictionary from the request body. Or, if the `try_url_params` argument is True and the request is a GET request, then an attempt is made to read the data from the url parameters of the request. try_url_params If try_url_params is False, then the data_dict is read from the request body. If try_url_params is True and the request is a GET request then the data is read from the url parameters. The resulting dict will only be 1 level deep, with the url-param fields being the keys. If a single key has more than one value specified, then the value will be a list of strings, otherwise just a string. ''' def mixed(multi_dict): u'''Return a dict with values being lists if they have more than one item or a string otherwise ''' out = {} for key, value in multi_dict.to_dict(flat=False).items(): out[key] = value[0] if len(value) == 1 else value return out if not try_url_params and request.method == u'GET': raise ValueError(u'Invalid request. Please use POST method ' 'for your request') request_data = {} if request.method in [u'POST', u'PUT'] and request.form: values = list(request.form.values()) if (len(values) == 1 and values[0] in [u'1', u'']): try: keys = list(request.form.keys()) request_data = json.loads(keys[0]) except ValueError as e: raise ValueError( u'Error decoding JSON data. ' 'Error: %r ' 'JSON data extracted from the request: %r' % (e, request_data)) else: request_data = mixed(request.form) elif request.args and try_url_params: request_data = mixed(request.args) elif (request.data and request.data != u'' and request.content_type != u'multipart/form-data'): try: request_data = request.get_json() except BadRequest as e: raise ValueError(u'Error decoding JSON data. ' 'Error: %r ' 'JSON data extracted from the request: %r' % (e, request_data)) if not isinstance(request_data, dict): raise ValueError(u'Request data JSON decoded to %r but ' 'it needs to be a dictionary.' % request_data) if request.method == u'PUT' and not request_data: raise ValueError(u'Invalid request. Please use the POST method for ' 'your request') for field_name, file_ in request.files.items(): request_data[field_name] = file_ log.debug(u'Request data extracted: %r', request_data) return request_data
def sub_app_get_deserialized(offset): res = sub_app_get(offset) if res == None: return None else: return json.loads(res)
def resource_view(self, id, resource_id, view_id=None): # custom_base.g_analitics() ''' Embedded page for a resource view. Depending on the type, different views are loaded. This could be an img tag where the image is loaded directly or an iframe that embeds a webpage or a recline preview. ''' context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} try: package = get_action('package_show')(context, {'id': id}) except NotFound: abort(404, _('Dataset not found')) except NotAuthorized: abort(401, _('Unauthorized to read dataset %s') % id) try: resource = get_action('resource_show')( context, {'id': resource_id}) # removes the host to make it relative if config.get('ckan.upload_file_url'): url = resource['url'] if config.get('ckan.upload_file_url') in url: url = url.split(config.get('ckan.upload_file_url')) resource['url'] = url[1] except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % resource_id) view = None if request.params.get('resource_view', ''): try: view = json.loads(request.params.get('resource_view', '')) except ValueError: abort(409, _('Bad resource view data')) elif view_id: try: view = get_action('resource_view_show')( context, {'id': view_id}) except NotFound: abort(404, _('Resource view not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource view %s') % view_id) if not view or not isinstance(view, dict): abort(404, _('Resource view not supplied')) analytics_helpers.update_analytics_code_by_organization(package['organization']['id']) return h.rendered_resource_view(view, resource, package, embed=True)
# this minimum match is explained # http://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29 query['mm'] = query.get('mm', '2<-1 5<80%') query['qf'] = query.get('qf', QUERY_FIELDS) conn = make_connection() log.debug('Package query: %r' % query) try: solr_response = conn.raw_query(**query) except SolrException, e: raise SearchError( 'SOLR returned an error running query: %r Error: %r' % (query, e.reason)) try: data = json.loads(solr_response) response = data['response'] self.count = response.get('numFound', 0) self.results = response.get('docs', []) # #1683 Filter out the last row that is sometimes out of order self.results = self.results[:rows_to_return] # get any extras and add to 'extras' dict for result in self.results: extra_keys = filter(lambda x: x.startswith('extras_'), result.keys()) extras = {} for extra_key in extra_keys: value = result.pop(extra_key) extras[extra_key[len('extras_'):]] = value