Beispiel #1
0
def _convert_to_tdf_resource(resource_dict, pkg_zipstream=None):
    '''Convert a CKAN resource dict into a Tabular Data Format resource dict.

    :param pkg_zipstream: If given and if the resource has a file uploaded to
        the FileStore, then the file will be written to the zipstream and the
        returned dict will contain "path" instead of "url".
    :type pkg_zipstream: zipstream.ZipFile

    '''
    if pkg_zipstream and resource_dict.get('url_type') == 'upload':

        name = resource_dict.get('name')
        if not name:
            # FIXME: Need to generate unique names (unique within the
            # package) for unnamed files.
            name = toolkit._('Unnamed file')
        resource = {'path': name}

        # Add the resource file itself into the ZIP file.
        pkg_zipstream.write(util.get_path_to_resource_file(resource_dict),
                            arcname=resource['path'])

    else:
        resource = {'url': resource_dict['url']}

    try:
        schema_string = resource_dict.get('schema', '')
        resource['schema'] = h.json.loads(schema_string)
    except ValueError:
        pass
    return resource
Beispiel #2
0
def _convert_to_tdf_resource(resource_dict, pkg_zipstream=None):
    '''Convert a CKAN resource dict into a Tabular Data Format resource dict.

    :param pkg_zipstream: If given and if the resource has a file uploaded to
        the FileStore, then the file will be written to the zipstream and the
        returned dict will contain "path" instead of "url".
    :type pkg_zipstream: zipstream.ZipFile

    '''
    if pkg_zipstream and resource_dict.get('url_type') == 'upload':

        name = resource_dict.get('name')
        if not name:
            # FIXME: Need to generate unique names (unique within the
            # package) for unnamed files.
            name = toolkit._('Unnamed file')
        resource = {'path': name}

        # Add the resource file itself into the ZIP file.
        pkg_zipstream.write(util.get_path_to_resource_file(resource_dict),
                            arcname=resource['path'])

    else:
        resource = {'url': resource_dict['url']}

    try:
        schema_string = resource_dict.get('schema', '')
        resource['schema'] = h.json.loads(schema_string)
    except ValueError:
        pass
    return resource
def csv_data(resource):
    '''Return the CSV data for the given resource.

    '''
    try:
        path = util.get_path_to_resource_file(resource)
    except exceptions.ResourceFileDoesNotExistException:
        return {'success': False,
                'error': toolkit._("There's no uploaded file for this "
                                   "resource")}
    return _csv_data_from_file(open(path))
Beispiel #4
0
def csv_data(resource):
    '''Return the CSV data for the given resource.

    '''
    try:
        path = util.get_path_to_resource_file(resource)
    except exceptions.ResourceFileDoesNotExistException:
        return {
            'success': False,
            'error': toolkit._("There's no uploaded file for this "
                               "resource")
        }
    return _csv_data_from_file(open(path))
    def test_get_path_to_resource_file_with_uploaded_file(self):

        user = factories.User()
        package = factories.Dataset(user=user)
        api = ckanapi.TestAppCKAN(self.app, apikey=user['apikey'])
        csv_file = custom_helpers.get_csv_file('test-data/datetimes.csv')
        resource = api.action.resource_create(package_id=package['id'],
                                              upload=csv_file)

        path = util.get_path_to_resource_file(resource)

        # Check that the path is correct by comparing the contents of the
        # uploaded copy of the file to the original file.
        assert open(path).read() == (
            open(os.path.join(os.path.split(__file__)[0],
                '../test-data/datetimes.csv')).read())
Beispiel #6
0
    def test_get_path_to_resource_file_with_uploaded_file(self):

        user = factories.User()
        package = factories.Dataset(user=user)
        api = ckanapi.TestAppCKAN(self.app, apikey=user['apikey'])
        csv_file = custom_helpers.get_csv_file('test-data/datetimes.csv')
        resource = api.action.resource_create(package_id=package['id'],
                                              upload=csv_file)

        path = util.get_path_to_resource_file(resource)

        # Check that the path is correct by comparing the contents of the
        # uploaded copy of the file to the original file.
        assert open(path).read() == (open(
            os.path.join(
                os.path.split(__file__)[0],
                '../test-data/datetimes.csv')).read())
Beispiel #7
0
def _infer_schema_for_resource(resource):
    '''Return a JSON Table Schema for the given resource.

    This will guess column headers and types from the resource's CSV file.

    '''
    # Note: Since this function is only called after uploading a file,
    # we assume the resource does have an uploaded file and this line will not
    # raise an exception.
    path = util.get_path_to_resource_file(resource)

    if not csv_utils.resource_is_csv_or_text_file(path):
        helpers.flash_notice(
            'This file does not seem to be a csv or text file. '
            'You could try validating this file at http://csvlint.io'
        )

    try:
        schema = csv_utils.infer_schema_from_csv_file(path)
    except exceptions.CouldNotReadCSVException:
        schema = {'fields': []}

    return schema
        data_dict, errors = dictization_functions.validate(data_dict,
            schema.resource_schema_field_create_schema(), context)
    except exceptions.InvalidResourceIDException, e:
        raise toolkit.ValidationError(e)
    if errors:
        raise toolkit.ValidationError(errors)

    resource_id = data_dict.pop('resource_id')

    resource_dict = toolkit.get_action('resource_show')(context,
        {'id': resource_id})

    if data_dict.get('type') in ('date', 'time', 'datetime'):

        try:
            path = util.get_path_to_resource_file(resource_dict)
        except exceptions.ResourceFileDoesNotExistException:
            path = None

        if path:
            try:
                data_dict['temporal_extent'] = csv_utils.temporal_extent(path,
                                                 column_num=data_dict['index'])
            except ValueError:
                pass
            except TypeError:
                pass

    schema_ = toolkit.get_action('resource_schema_show')(context,
        {'resource_id': resource_id})
    schema_['fields'].append(data_dict)