Example #1
0
    def import_data(self, user, upload, external_id_field_index=None):
        """
        Import data into this ``Dataset`` from a given ``DataUpload``.
        """
        self.lock()

        try:
            if upload.imported:
                raise DataImportError('This file has already been imported.')

            task_type = get_import_task_type_for_upload(upload)

            if not task_type:
                # This is normally caught on the client.
                raise DataImportError('This file type is not supported for data import.')
            
            if self.column_schema:
                # This is normally caught on the client.
                if upload.columns != [c['name'] for c in self.column_schema]:
                    raise DataImportError('The columns in this file do not match those in the dataset.')
            else:
                self.column_schema = []
                
                for i, c in enumerate(upload.columns):
                    self.column_schema.append({
                        'name': c,
                        'indexed': False,
                        'type': upload.guessed_types[i],
                        'indexed_name': None,
                        'min': None,
                        'max': None
                    })
                
            if self.sample_data is None:
                self.sample_data = upload.sample_data

            # If this is the first import and the API hasn't been used, save that information
            if self.initial_upload is None and self.row_count is None:
                self.initial_upload = upload

            self.current_task = TaskStatus.objects.create(task_name=task_type.name, creator=user)
            self.save()

            task_type.apply_async(
                args=[self.slug, upload.id],
                kwargs={ 'external_id_field_index': external_id_field_index },
                task_id=self.current_task.id
            )
        except:
            self.unlock()
            raise
Example #2
0
    def import_data(self, user, upload, external_id_field_index=None):
        """
        Import data into this ``Dataset`` from a given ``DataUpload``.
        """
        self.lock()

        try:
            if upload.imported:
                raise DataImportError(
                    _('This file has already been imported.'))

            task_type = get_import_task_type_for_upload(upload)

            if not task_type:
                # This is normally caught on the client.
                raise DataImportError(
                    _('This file type is not supported for data import.'))

            if self.column_schema:
                # This is normally caught on the client.
                if upload.columns != [c['name'] for c in self.column_schema]:
                    raise DataImportError(
                        _('The columns in this file do not match those in the dataset.'
                          ))
            else:
                self.column_schema = make_column_schema(
                    upload.columns, types=upload.guessed_types)

            if self.sample_data is None:
                self.sample_data = upload.sample_data

            # If this is the first import and the API hasn't been used, save that information
            if self.initial_upload is None and self.row_count is None:
                self.initial_upload = upload

            self.current_task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description=_('Import data from %(filename)s into %(slug)s.') \
                    % {'filename': upload.filename, 'slug': self.slug},
                creator=user
            )
            self.save()

            task_type.apply_async(
                args=[self.slug, upload.id],
                kwargs={'external_id_field_index': external_id_field_index},
                task_id=self.current_task.id)
        except:
            self.unlock()
            raise
Example #3
0
    def import_data(self, user, upload, external_id_field_index=None, schema_overrides = {}):
        """
        Import data into this ``Dataset`` from a given ``DataUpload``.
        """
        self.lock()

        try:
            if upload.imported:
                raise DataImportError(_('This file has already been imported.'))

            task_type = get_import_task_type_for_upload(upload)

            if not task_type:
                # This is normally caught on the client.
                raise DataImportError(_('This file type is not supported for data import.'))
            
            if self.column_schema:
                # This is normally caught on the client.
                if upload.columns != [c['name'] for c in self.column_schema]:
                    raise DataImportError(_('The columns in this file do not match those in the dataset.'))
            else:
                self.column_schema = make_column_schema(upload.columns, types=upload.guessed_types, overrides=schema_overrides)

            if self.sample_data is None:
                self.sample_data = upload.sample_data

            # If this is the first import and the API hasn't been used, save that information
            if self.initial_upload is None and self.row_count is None:
                self.initial_upload = upload

            self.current_task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description=_('Import data from %(filename)s into %(slug)s.') \
                    % {'filename': upload.filename, 'slug': self.slug},
                creator=user
            )
            self.save()

            task_type.apply_async(
                args=[self.slug, upload.id],
                kwargs={ 'external_id_field_index': external_id_field_index },
                task_id=self.current_task.id
            )
        except:
            self.unlock()
            raise
Example #4
0
File: dataset.py Project: eob/panda
    def import_data(self, user, upload, external_id_field_index=None):
        """
        Import data into this ``Dataset`` from a given ``DataUpload``. 
        """
        if upload.imported:
            raise DataImportError('This file has already been imported.')

        task_type = get_import_task_type_for_upload(upload)

        if not task_type:
            # This is normally caught on the client.
            raise DataImportError('This file type is not supported for data import.')
        
        if self.columns:
            # This is normally caught on the client.
            if upload.columns != self.columns:
                raise DataImportError('The columns in this file do not match those in the dataset.')
        else:
            self.columns = upload.columns

        if self.sample_data is None:
            self.sample_data = upload.sample_data
        else:
            # TODO - extend?
            pass

        # If this is the first import and the API hasn't been used, save that information
        if self.initial_upload is None and self.row_count is None:
            self.initial_upload = upload

        self.current_task = TaskStatus.objects.create(task_name=task_type.name, creator=user)
        self.save()

        task_type.apply_async(
            args=[self.slug, upload.id],
            kwargs={ 'external_id_field_index': external_id_field_index },
            task_id=self.current_task.id
        )