Python prepare_csv_rows 예제들, tablo.csv_utils.prepare_csv_rows Python 예제들

예제 #1

0

파일 보기

파일: test_csv_utils.py 프로젝트: rafick1983/tablo

    def test_determine_x_and_y_fields(self):
        test_csv_file = io.StringIO('lon,lat\n' '123.4,50.2\n')
        prepared_csv = prepare_csv_rows(test_csv_file)
        self.assertEqual(prepared_csv['coord_fields'], ('lon', 'lat'))

        test_csv_file = io.StringIO('lon,something_else\n' '123.4,50.2\n')
        prepared_csv = prepare_csv_rows(test_csv_file)
        self.assertEqual(prepared_csv['coord_fields'], (None, None))

예제 #2

0

파일 보기

파일: api.py 프로젝트: consbio/tablo

    def append(self, request, **kwargs):
        self.is_authenticated(request)

        try:
            dataset_id = kwargs.get('dataset_id')

            del kwargs['dataset_id']
            bundle = self.build_bundle(request=request)
            obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs))

            csv_info = json.loads(request.POST.get('csv_info'))
            additional_fields = json.loads(request.POST.get('fields'))

            row_set = prepare_csv_rows(obj.file)
            sample_row = next(row_set.sample)
            table_name = create_database_table(sample_row, dataset_id, append=True)

            add_or_update_database_fields(table_name, additional_fields)
            populate_data(table_name, row_set)

            bundle.data['table_name'] = table_name

            populate_point_data(dataset_id, csv_info)
            obj.delete()    # Temporary file has been moved to database, safe to delete
        except InternalError:
            logger.exception()
            raise ImmediateHttpResponse(HttpBadRequest('Error deploying file to database.'))

        return self.create_response(request, bundle)

예제 #3

0

파일 보기

파일: test_csv_utils.py 프로젝트: rafick1983/tablo

 def test_data_type_inference(self):
     test_csv_file = io.StringIO(
         'header_one,header_two,header_three,header_four\n'
         'one,1,,1\n'
         'two,2,,2.1\n')
     prepared_csv = prepare_csv_rows(test_csv_file)
     dtypes = prepared_csv['data_types']
     self.assertEqual(dtypes[0].lower(), 'string')
     self.assertEqual(dtypes[1].lower(), 'integer')
     self.assertEqual(dtypes[2].lower(), 'empty')
     self.assertEqual(dtypes[3].lower(), 'decimal')

예제 #4

0

파일 보기

    def append(self, request, **kwargs):
        self.is_authenticated(request)

        try:
            dataset_id = kwargs.pop('dataset_id', None)

            bundle = self.build_bundle(request=request)
            obj = self.obj_get(bundle,
                               **self.remove_api_resource_names(kwargs))

            csv_info = json.loads(request.POST.get('csv_info'))
            additional_fields = json.loads(request.POST.get('fields'))

            prepared_csv = prepare_csv_rows(obj.file, csv_info)
            table_name = create_database_table(
                prepared_csv['row_set'],
                csv_info,
                dataset_id,
                append=True,
                additional_fields=additional_fields)
            self.populate_point_data(dataset_id, csv_info)

            bundle.data['table_name'] = table_name

            obj.delete(
            )  # Temporary file has been moved to database, safe to delete

        except Exception as e:
            logger.exception(e)

            raise ImmediateHttpResponse(
                HttpBadRequest(content=json.dumps(
                    derive_error_response_data(e)),
                               content_type='application/json'))

        return self.create_response(request, bundle)

예제 #5

0

파일 보기

파일: api.py 프로젝트: consbio/tablo

    def deploy(self, request, **kwargs):
        """
            The deploy endpoint, at ``{tablo_server}/api/v1/temporary-files/{uuid}/{dataset_id}/deploy/`` deploys
            the file specified by {uuid} into a database table named after the {dataset_id}. The {dataset_id} must
            be unique for the instance of Tablo.

            With the deploy endpoint, this is the start of what Tablo considers an import. The data will be
            temporarily stored in an import table until the finalize endpoint for the dataset_id is called.

            POST messages to the deploy endpoint should include the following data:

            **csv_info**
                Information about the CSV file. This is generally that information obtained through the
                describe endpoint, but can be modified to send additional information or modify it.
            **fields**
                A list of field JSON objects in the following format:

                .. code-block:: json

                    {
                        "name": "field_name",
                        "type": "text",
                        "value": "optional value",
                        "required": true
                    }

                The value can be specified if the field is a constant value throughout the table. This can
                be use for adding audit information.

            :return:
                An empty HTTP 200 response if the deploy was successful. An error response if otherwise.
        """
        self.is_authenticated(request)

        try:
            dataset_id = kwargs.get('dataset_id')

            del kwargs['dataset_id']
            bundle = self.build_bundle(request=request)
            obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs))

            csv_info = json.loads(request.POST.get('csv_info'))
            additional_fields = json.loads(request.POST.get('fields'))

            # Use separate iterator of table rows to not exaust the main one
            optional_fields = determine_optional_fields(prepare_csv_rows(obj.file))

            row_set = prepare_csv_rows(obj.file)
            sample_row = next(row_set.sample)
            table_name = create_database_table(sample_row, dataset_id, optional_fields=optional_fields)
            populate_data(table_name, row_set)

            add_or_update_database_fields(table_name, additional_fields)

            bundle.data['table_name'] = table_name

            add_point_column(dataset_id)

            populate_point_data(dataset_id, csv_info)
            obj.delete()    # Temporary file has been moved to database, safe to delete
        except InternalError:
            logger.exception()
            raise ImmediateHttpResponse(HttpBadRequest('Error deploying file to database.'))

        return self.create_response(request, bundle)

예제 #6

0

파일 보기

파일: api.py 프로젝트: consbio/tablo

    def describe(self, request, **kwargs):
        """
        Describe, located at the ``{tablo-server}/api/v1/temporary-files/{uuid}/describe`` endpoint, will describe
        the uploaded CSV file. This allows you to know the column names and data types that were found within the
        file.

        :return:
            A JSON object in the following format:

            .. code-block:: json

                {
                    "fieldNames": ["field one", "field two", "latitude", "longitude"],
                    "dataTypes": ["STRING", "INTEGER", "DOUBLE", "DOUBLE"],
                    "optionalFields": ["field one"],
                    "xColumn": "longitude",
                    "yColumn": "latitude",
                    "filename": "uploaded.csv"
                }

            **fieldNames**
                A list of field (column) names within the CSV

            **dataTypes**
                A list of data types for each of the columns. The index of this list will match the index of the
                fieldNames list.

            **optionalFields**
                A list of fields that had empty values, and are taken to be optional.

            **xColumn**
                The best guess at which column contains X spatial coordinates.

            **yColumn**
                The best guess at which column contains Y spatial coordinates.

            **filename**
                The name of the file being described

        """
        self.is_authenticated(request)

        bundle = self.build_bundle(request=request)
        obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs))

        if obj.extension == 'csv':
            csv_file_name = obj.file.name
        else:
            raise ImmediateHttpResponse(HttpBadRequest('Unsupported file format.'))

        row_set = prepare_csv_rows(obj.file)

        sample_row = next(row_set.sample)
        bundle.data['fieldNames'] = [cell.column for cell in sample_row]
        bundle.data['dataTypes'] = [TYPE_REGEX.sub('', str(cell.type)) for cell in sample_row]
        bundle.data['optionalFields'] = determine_optional_fields(row_set)

        x_field, y_field = determine_x_and_y_fields(sample_row)
        if x_field and y_field:
            bundle.data['xColumn'] = x_field
            bundle.data['yColumn'] = y_field

        bundle.data.update({'file_name': csv_file_name})

        return self.create_response(request, bundle)

예제 #7

0

파일 보기

    def deploy(self, request, **kwargs):
        """
            The deploy endpoint, at ``{tablo_server}/api/v1/temporary-files/{uuid}/{dataset_id}/deploy/`` deploys
            the file specified by {uuid} into a database table named after the {dataset_id}. The {dataset_id} must
            be unique for the instance of Tablo.

            With the deploy endpoint, this is the start of what Tablo considers an import. The data will be
            temporarily stored in an import table until the finalize endpoint for the dataset_id is called.

            POST messages to the deploy endpoint should include the following data:

            **csv_info**
                Information about the CSV file. This is generally that information obtained through the
                describe endpoint, but can be modified to send additional information or modify it.
            **fields**
                A list of field JSON objects in the following format:

                .. code-block:: json

                    {
                        "name": "field_name",
                        "type": "text",
                        "required": true
                    }

                The value can be specified if the field is a constant value throughout the table. This can
                be use for adding audit information.

            :return:
                An empty HTTP 200 response if the deploy was successful. An error response if otherwise.
        """
        self.is_authenticated(request)

        try:
            dataset_id = kwargs.pop('dataset_id', None)

            bundle = self.build_bundle(request=request)
            obj = self.obj_get(bundle,
                               **self.remove_api_resource_names(kwargs))

            csv_info = json.loads(request.POST.get('csv_info'))
            additional_fields = json.loads(request.POST.get('fields'))

            prepared_csv = prepare_csv_rows(obj.file, csv_info)

            table_name = create_database_table(
                prepared_csv['row_set'],
                csv_info,
                dataset_id,
                additional_fields=additional_fields)
            add_geometry_column(dataset_id)
            self.populate_point_data(dataset_id, csv_info)

            bundle.data['table_name'] = table_name

            obj.delete(
            )  # Temporary file has been moved to database, safe to delete

        except Exception as e:
            logger.exception(e)

            raise ImmediateHttpResponse(
                HttpBadRequest(content=json.dumps(
                    derive_error_response_data(e)),
                               content_type='application/json'))

        return self.create_response(request, bundle)

예제 #8

0

파일 보기

    def describe(self, request, **kwargs):
        """
        Describe, located at the ``{tablo-server}/api/v1/temporary-files/{uuid}/describe`` endpoint, will describe
        the uploaded CSV file. This allows you to know the column names and data types that were found within the
        file.

        :return:
            A JSON object in the following format:

            .. code-block:: json

                {
                    "fieldNames": ["field one", "field two", "latitude", "longitude"],
                    "dataTypes": ["String", "Integer", "Decimal", "Decimal"],
                    "optionalFields": ["field one"],
                    "xColumn": "longitude",
                    "yColumn": "latitude",
                    "filename": "uploaded.csv"
                }

            **fieldNames**
                A list of field (column) names within the CSV

            **dataTypes**
                A list of data types for each of the columns. The index of this list will match the index of the
                fieldNames list.

            **optionalFields**
                A list of fields that had empty values, and are taken to be optional.

            **xColumn**
                The best guess at which column contains X spatial coordinates.

            **yColumn**
                The best guess at which column contains Y spatial coordinates.

            **filename**
                The name of the file being described

        """
        self.is_authenticated(request)

        bundle = self.build_bundle(request=request)
        obj = self.obj_get(bundle, **self.remove_api_resource_names(kwargs))

        try:
            if obj.extension == 'csv':
                csv_file_name = obj.file.name
            else:
                raise InvalidFileError('Unsupported file format',
                                       extension=obj.extension)

        except InvalidFileError as e:
            raise ImmediateHttpResponse(
                HttpBadRequest(content=json.dumps(
                    derive_error_response_data(e, code=BAD_DATA)),
                               content_type='application/json'))

        csv_info = json.loads(request.POST.get('csv_info') or '{}') or None

        prepared_csv = prepare_csv_rows(obj.file, csv_info)
        row_set = prepared_csv['row_set']

        if not len(row_set):
            raise ImmediateHttpResponse(
                HttpBadRequest(content=json.dumps(
                    derive_error_response_data(InvalidFileError(
                        'File is empty', lines=0),
                                               code=BAD_DATA)),
                               content_type='application/json'))

        bundle.data['fieldNames'] = row_set.columns.to_list()
        bundle.data['dataTypes'] = prepared_csv['data_types']
        bundle.data['optionalFields'] = prepared_csv['optional_fields']

        x_field, y_field = prepared_csv['coord_fields']
        if x_field and y_field:
            bundle.data['xColumn'] = x_field
            bundle.data['yColumn'] = y_field

        bundle.data.update({'file_name': csv_file_name})

        return self.create_response(request, bundle)