def proxy_query(resource, url, query): parts = urlparse.urlparse(url) # Get resource type - first try to see whether there is type= URL option, # if there is not, try to get it from file extension if parts.scheme not in ['http', 'https']: raise ResourceError( 'Only HTTP(S) URLs are supported', 'Data proxy does not support %s URLs' % parts.scheme) resource_type = query.get("type") if not resource_type: resource_type = os.path.splitext(parts.path)[1] if not resource_type: raise RequestError( 'Could not determine the resource type', 'If file has no type extension, specify file type in type= option') resource_type = re.sub(r'^\.', '', resource_type.lower()) try: trans = transformer(resource_type, resource, url, query) except Exception, e: return e raise RequestError( 'Resource type not supported', 'Transformation of resource of type %s is not supported. Reason: %s' % (resource_type, e))
def proxy_query(resource, url, query): parts = urlparse.urlparse(url) # Get resource type - first try to see whether there is type= URL option, # if there is not, try to get it from file extension if parts.scheme not in ['http', 'https']: raise ResourceError('Only HTTP(S) URLs are supported', 'Data proxy does not support %s URLs' % parts.scheme) resource_type = query.get("type") if not resource_type: resource_type = os.path.splitext(parts.path)[1] if not resource_type: raise RequestError('Could not determine the resource type', 'If file has no type extension, specify file type in type= option') resource_type = re.sub(r'^\.', '', resource_type.lower()) try: trans = transformer(resource_type, resource, url, query) except Exception, e: return e raise RequestError('Resource type not supported', 'Transformation of resource of type %s is not supported. Reason: %s' % (resource_type, e))
def proxy_query(resource, url, query): ''' Given the URL for a data file, return its transformed contents in JSON form. e.g. if it is a spreadsheet, it returns a JSON dict: { "fields": ['Name', 'Age'], "data": [['Bob', 42], ['Jill', 54]], "max_results": 10, "length": 435, "url": "http://data.com/file.csv", } Whatever it is, it always has length (file size in bytes) and url (where it got the data from, which might be a URL or a local cache filepath). May raise RequestError. :param resource: resource object :param url: URL or local filepath :param query: dict about the URL: type - (optional) format of the file - extension or mimetype. Only specify this if you the caller knows better than magic can detect it. Defaults to the file extension of the URL. length - (optional) size of the file. If not supplied, it will determine it. size_limit - max size of the file to transform indent - (optional) the indent for the pprint the JSON result ''' parts = urlparse.urlparse(url) # Get resource type - first try to see whether there is type= URL option, # if there is not, try to get it from file extension if parts.scheme not in ['http', 'https']: query['handler'] = _open_file else: query['handler'] = _open_url resource_type = query.get("type") if not resource_type: resource_type = os.path.splitext(parts.path)[1] if not resource_type: raise RequestError( 'Could not determine the resource type', 'If file has no type extension, specify file type in type= option') resource_type = re.sub(r'^\.', '', resource_type.lower()) try: trans = transformer(resource_type, resource, url, query) if not trans: raise Exception("No transformer for %s" % resource_type) except Exception, e: raise RequestError( 'Resource type not supported', 'Transformation of resource of type %s is not supported.' % (resource_type))
def proxy_query(resource, url, query): ''' Given the URL for a data file, return its transformed contents in JSON form. e.g. if it is a spreadsheet, it returns a JSON dict: { "fields": ['Name', 'Age'], "data": [['Bob', 42], ['Jill', 54]], "max_results": 10, "length": 435, "url": "http://data.com/file.csv", } Whatever it is, it always has length (file size in bytes) and url (where it got the data from, which might be a URL or a local cache filepath). May raise RequestError. :param resource: resource object :param url: URL or local filepath :param query: dict about the URL: type - (optional) format of the file - extension or mimetype. Only specify this if you the caller knows better than magic can detect it. Defaults to the file extension of the URL. length - (optional) size of the file. If not supplied, it will determine it. size_limit - max size of the file to transform indent - (optional) the indent for the pprint the JSON result ''' parts = urlparse.urlparse(url) # Get resource type - first try to see whether there is type= URL option, # if there is not, try to get it from file extension if parts.scheme not in ['http', 'https']: query['handler'] = _open_file else: query['handler'] = _open_url resource_type = query.get("type") if not resource_type: resource_type = os.path.splitext(parts.path)[1] if not resource_type: raise RequestError('Could not determine the resource type', 'If file has no type extension, specify file type in type= option') resource_type = re.sub(r'^\.', '', resource_type.lower()) try: trans = transformer(resource_type, resource, url, query) if not trans: raise Exception("No transformer for %s" % resource_type) except Exception, e: raise RequestError('Resource type not supported', 'Transformation of resource of type %s is not supported.' % (resource_type))
def proxy_query(resource, url, query): ''' Given the URL for a data file, return its transformed contents in JSON form. e.g. if it is a spreadsheet, it returns a JSON dict: { "archived": "This file is previewed from the data.gov.uk archive.", "fields": ['Name', 'Age'], "data": [['Bob', 42], ['Jill', 54]], "extra_text": "This preview shows only the first 10 rows", "max_results": 10, "length": 435, "url": "http://data.com/file.csv", } Whatever it is, it always has length (file size in bytes) and url (where it got the data from, which might be a URL or a local cache filepath). Or an error message: { "error": { "message": "Requested resource is 21.3MB. Size limit is 19MB. Resource: /dataset/your-freedom-data/resource/ea11ed1e-d793-4fc6-b150-fb362a7ccac9", "title": "The requested file is too large to preview" } } May raise RequestError. :param resource: resource object :param url: URL or local filepath :param query: dict about the URL: type - (optional) format of the file - extension or mimetype. Only specify this if you the caller knows better than magic can detect it. Defaults to the file extension of the URL. length - (optional) size of the file. If not supplied, it will determine it. size_limit - max size of the file to transform indent - (optional) the indent for the pprint the JSON result ''' parts = urlparse.urlparse(url) # Get resource type - first try to see whether there is type= URL option, # if there is not, try to get it from file extension if parts.scheme not in ['http', 'https']: query['handler'] = _open_file else: query['handler'] = _open_url resource_type = query.get("type") if not resource_type: resource_type = os.path.splitext(parts.path)[1] if not resource_type: raise RequestError('Could not determine the resource type', 'If file has no type extension, specify file type in type= option') resource_type = re.sub(r'^\.', '', resource_type.lower()) resource_type = resource_type.replace('json-stat', 'json') try: trans = transformer(resource_type, resource, url, query) if not trans: raise Exception("No transformer for %s" % resource_type) except Exception, e: raise RequestError('Resource type not supported', 'Transformation of resource of type %s is not supported.' % (resource_type))