Example #1
0
def proxy_query(resource, url, query):
    parts = urlparse.urlparse(url)

    # Get resource type - first try to see whether there is type= URL option,
    # if there is not, try to get it from file extension

    if parts.scheme not in ['http', 'https']:
        raise ResourceError(
            'Only HTTP(S) URLs are supported',
            'Data proxy does not support %s URLs' % parts.scheme)

    resource_type = query.get("type")
    if not resource_type:
        resource_type = os.path.splitext(parts.path)[1]

    if not resource_type:
        raise RequestError(
            'Could not determine the resource type',
            'If file has no type extension, specify file type in type= option')

    resource_type = re.sub(r'^\.', '', resource_type.lower())
    try:
        trans = transformer(resource_type, resource, url, query)
    except Exception, e:
        return e
        raise RequestError(
            'Resource type not supported',
            'Transformation of resource of type %s is not supported. Reason: %s'
            % (resource_type, e))
Example #2
0
def proxy_query(resource, url, query):
    parts = urlparse.urlparse(url)

    # Get resource type - first try to see whether there is type= URL option,
    # if there is not, try to get it from file extension

    if parts.scheme not in ['http', 'https']:
        raise ResourceError('Only HTTP(S) URLs are supported',
                            'Data proxy does not support %s URLs' % parts.scheme)

    resource_type = query.get("type")
    if not resource_type:
        resource_type = os.path.splitext(parts.path)[1]

    if not resource_type:
        raise RequestError('Could not determine the resource type',
                            'If file has no type extension, specify file type in type= option')

    resource_type = re.sub(r'^\.', '', resource_type.lower())
    try:
        trans = transformer(resource_type, resource, url, query)
    except Exception, e:
        return e
        raise RequestError('Resource type not supported',
                            'Transformation of resource of type %s is not supported. Reason: %s'
                              % (resource_type, e))
Example #3
0
def proxy_query(resource, url, query):
    '''
    Given the URL for a data file, return its transformed contents in JSON form.

    e.g. if it is a spreadsheet, it returns a JSON dict:
        {
            "fields": ['Name', 'Age'],
            "data": [['Bob', 42], ['Jill', 54]],
            "max_results": 10,
            "length": 435,
            "url": "http://data.com/file.csv",
        }
    Whatever it is, it always has length (file size in bytes) and url (where
    it got the data from, which might be a URL or a local cache filepath).

    May raise RequestError.

    :param resource: resource object
    :param url: URL or local filepath
    :param query: dict about the URL:
          type - (optional) format of the file - extension or mimetype.
                            Only specify this if you the caller knows better
                            than magic can detect it.
                            Defaults to the file extension of the URL.
          length - (optional) size of the file. If not supplied,
                              it will determine it.
          size_limit - max size of the file to transform
          indent - (optional) the indent for the pprint the JSON result
    '''
    parts = urlparse.urlparse(url)

    # Get resource type - first try to see whether there is type= URL option,
    # if there is not, try to get it from file extension

    if parts.scheme not in ['http', 'https']:
        query['handler'] = _open_file
    else:
        query['handler'] = _open_url

    resource_type = query.get("type")
    if not resource_type:
        resource_type = os.path.splitext(parts.path)[1]

    if not resource_type:
        raise RequestError(
            'Could not determine the resource type',
            'If file has no type extension, specify file type in type= option')

    resource_type = re.sub(r'^\.', '', resource_type.lower())
    try:
        trans = transformer(resource_type, resource, url, query)
        if not trans:
            raise Exception("No transformer for %s" % resource_type)
    except Exception, e:
        raise RequestError(
            'Resource type not supported',
            'Transformation of resource of type %s is not supported.' %
            (resource_type))
def proxy_query(resource, url, query):
    '''
    Given the URL for a data file, return its transformed contents in JSON form.

    e.g. if it is a spreadsheet, it returns a JSON dict:
        {
            "fields": ['Name', 'Age'],
            "data": [['Bob', 42], ['Jill', 54]],
            "max_results": 10,
            "length": 435,
            "url": "http://data.com/file.csv",
        }
    Whatever it is, it always has length (file size in bytes) and url (where
    it got the data from, which might be a URL or a local cache filepath).

    May raise RequestError.

    :param resource: resource object
    :param url: URL or local filepath
    :param query: dict about the URL:
          type - (optional) format of the file - extension or mimetype.
                            Only specify this if you the caller knows better
                            than magic can detect it.
                            Defaults to the file extension of the URL.
          length - (optional) size of the file. If not supplied,
                              it will determine it.
          size_limit - max size of the file to transform
          indent - (optional) the indent for the pprint the JSON result
    '''
    parts = urlparse.urlparse(url)

    # Get resource type - first try to see whether there is type= URL option,
    # if there is not, try to get it from file extension

    if parts.scheme not in ['http', 'https']:
        query['handler'] = _open_file
    else:
        query['handler'] = _open_url

    resource_type = query.get("type")
    if not resource_type:
        resource_type = os.path.splitext(parts.path)[1]

    if not resource_type:
        raise RequestError('Could not determine the resource type',
            'If file has no type extension, specify file type in type= option')

    resource_type = re.sub(r'^\.', '', resource_type.lower())
    try:
        trans = transformer(resource_type, resource, url, query)
        if not trans:
            raise Exception("No transformer for %s" % resource_type)
    except Exception, e:
        raise RequestError('Resource type not supported',
            'Transformation of resource of type %s is not supported.'
            % (resource_type))
Example #5
0
def proxy_query(resource, url, query):
    '''
    Given the URL for a data file, return its transformed contents in JSON form.

    e.g. if it is a spreadsheet, it returns a JSON dict:
        {
            "archived": "This file is previewed from the data.gov.uk archive.",
            "fields": ['Name', 'Age'],
            "data": [['Bob', 42], ['Jill', 54]],
            "extra_text": "This preview shows only the first 10 rows",
            "max_results": 10,
            "length": 435,
            "url": "http://data.com/file.csv",
        }
    Whatever it is, it always has length (file size in bytes) and url (where
    it got the data from, which might be a URL or a local cache filepath).

    Or an error message:
        {
            "error": {
                "message": "Requested resource is 21.3MB. Size limit is  19MB. Resource: /dataset/your-freedom-data/resource/ea11ed1e-d793-4fc6-b150-fb362a7ccac9",
                "title": "The requested file is too large to preview"
            }
        }

    May raise RequestError.

    :param resource: resource object
    :param url: URL or local filepath
    :param query: dict about the URL:
          type - (optional) format of the file - extension or mimetype.
                            Only specify this if you the caller knows better
                            than magic can detect it.
                            Defaults to the file extension of the URL.
          length - (optional) size of the file. If not supplied,
                              it will determine it.
          size_limit - max size of the file to transform
          indent - (optional) the indent for the pprint the JSON result
    '''
    parts = urlparse.urlparse(url)

    # Get resource type - first try to see whether there is type= URL option,
    # if there is not, try to get it from file extension

    if parts.scheme not in ['http', 'https']:
        query['handler'] = _open_file
    else:
        query['handler'] = _open_url

    resource_type = query.get("type")
    if not resource_type:
        resource_type = os.path.splitext(parts.path)[1]

    if not resource_type:
        raise RequestError('Could not determine the resource type',
            'If file has no type extension, specify file type in type= option')

    resource_type = re.sub(r'^\.', '', resource_type.lower())
    resource_type = resource_type.replace('json-stat', 'json')
    try:
        trans = transformer(resource_type, resource, url, query)
        if not trans:
            raise Exception("No transformer for %s" % resource_type)
    except Exception, e:
        raise RequestError('Resource type not supported',
            'Transformation of resource of type %s is not supported.'
            % (resource_type))