Beispiel #1
0
    def list(self, request, *args, **kwargs):

        # get schema, table and column_names from the querystring
        schema_name = self.request.GET.get('schema')
        table_name = self.request.GET.get('table')
        column_names = self.request.GET.getlist('column')

        # get the columns which the user is allowed to access
        user_columns = get_user_columns(self.request.user, schema_name, table_name)

        if user_columns:
            # get the row query params from the request
            ordering, page, page_size, search, filters = self._get_query_params(user_columns)

            # filter by input column names by the the allowed columns
            if column_names:
                column_names = [column.name for column in user_columns if column.name in column_names]
            else:
                column_names = [column.name for column in user_columns]

            # get database adapter
            adapter = DatabaseAdapter()

            # query the database for the total number of rows
            count = adapter.count_rows(schema_name, table_name, column_names, search, filters)

            # query the paginated rowset
            results = adapter.fetch_rows(schema_name, table_name, column_names, ordering, page, page_size, search, filters)

            # return ordered dict to be send as json
            return Response(OrderedDict((
                ('count', count),
                ('results', fix_for_json(results)),
                ('next', self._get_next_url(page, page_size, count)),
                ('previous', self._get_previous_url(page))
            )))

        # if nothing worked, return 404
        raise NotFound()
Beispiel #2
0
    def rows(self, column_names, ordering, page, page_size, search, filters):
        if self.phase == self.PHASE_COMPLETED:
            # check if the columns are actually in the jobs table
            errors = {}

            for column_name in column_names:
                if column_name not in self.column_names:
                    errors[column_name] = _('Column not found.')

            if errors:
                raise ValidationError(errors)

            # get database adapter
            adapter = DatabaseAdapter()

            try:
                # query the database for the total number of rows
                count = adapter.count_rows(self.schema_name, self.table_name,
                                           column_names, search, filters)

                # query the paginated rowset
                rows = adapter.fetch_rows(self.schema_name, self.table_name,
                                          column_names, ordering, page,
                                          page_size, search, filters)

                # flatten the list if only one column is retrieved
                if len(column_names) == 1:
                    return count, [element for row in rows for element in row]
                else:
                    return count, rows

            except ProgrammingError:
                return 0, []

        else:
            raise ValidationError({'phase': ['Job is not COMPLETED.']})
Beispiel #3
0
    def list(self, request, *args, **kwargs):
        # get the row query params from the request
        ordering, page, page_size, search, filters = self._get_query_params(
            settings.ARCHIVE_COLUMNS)

        # get database adapter
        adapter = DatabaseAdapter()

        # get the schema_name and the table_name from the settings
        schema_name = settings.ARCHIVE_SCHEMA
        table_name = settings.ARCHIVE_TABLE

        # get collecions for this user and add them to the filters
        collections = [
            collection.name for collection in
            Collection.objects.filter_by_access_level(request.user)
        ]
        filters['collection'] = collections

        # get the name of the columns
        column_names = [column['name'] for column in settings.ARCHIVE_COLUMNS]

        # query the database for the total number of rows
        count = adapter.count_rows(schema_name, table_name, column_names,
                                   search, filters)

        # query the paginated rowset
        results = adapter.fetch_rows(schema_name, table_name, column_names,
                                     ordering, page, page_size, search,
                                     filters)

        # return ordered dict to be send as json
        return Response(
            OrderedDict((('count', count), ('results', results),
                         ('next', self._get_next_url(page, page_size, count)),
                         ('previous', self._get_previous_url(page)))))
Beispiel #4
0
def run_database_query_task(job_id):
    # always import daiquiri packages inside the task
    from daiquiri.core.adapter import DatabaseAdapter
    from daiquiri.query.models import QueryJob
    from daiquiri.query.utils import get_quota, get_job_sources, get_job_columns, ingest_uploads
    from daiquiri.stats.models import Record

    # get logger
    logger = logging.getLogger(__name__)

    # get the job object from the database
    job = QueryJob.objects.get(pk=job_id)

    if job.phase == job.PHASE_QUEUED:
        # get the adapter with the database specific functions
        adapter = DatabaseAdapter()

        # create the database of the user if it not already exists
        try:
            adapter.create_user_schema_if_not_exists(job.schema_name)
        except OperationalError as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            job.save()

            return job.phase

        # check if the quota is exceeded
        if QueryJob.objects.get_size(job.owner) > get_quota(job.owner):
            job.phase = job.PHASE_ERROR
            job.error_summary = str(
                _('Quota is exceeded. Please remove some of your jobs.'))
            job.save()

            return job.phase

        # set database and start time
        job.pid = adapter.fetch_pid()
        job.actual_query = adapter.build_query(job.schema_name, job.table_name,
                                               job.native_query, job.timeout,
                                               job.max_records)
        job.phase = job.PHASE_EXECUTING
        job.start_time = now()
        job.save()

        logger.info('job %s started' % job.id)

        # get the actual query and submit the job to the database
        try:
            ingest_uploads(job.uploads, job.owner)

            # this is where the work ist done (and the time is spend)
            adapter.submit_query(job.actual_query)

        except (ProgrammingError, InternalError, ValueError) as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        except OperationalError as e:
            # load the job again and check if the job was killed
            job = QueryJob.objects.get(pk=job_id)

            if job.phase != job.PHASE_ABORTED:
                job.phase = job.PHASE_ERROR
                job.error_summary = str(e)
                logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        else:
            # get additional information about the completed job
            job.phase = job.PHASE_COMPLETED
            logger.info('job %s completed' % job.id)

        finally:
            # get timing and save the job object
            job.end_time = now()

            # get additional information about the completed job
            if job.phase == job.PHASE_COMPLETED:
                job.nrows = adapter.count_rows(job.schema_name, job.table_name)
                job.size = adapter.fetch_size(job.schema_name, job.table_name)

                # fetch the metadata for used tables
                job.metadata['sources'] = get_job_sources(job)

                # fetch the metadata for the columns and fetch additional metadata from the metadata store
                job.metadata['columns'] = get_job_columns(job)

            # remove unneeded metadata
            job.metadata.pop('display_columns', None)
            job.metadata.pop('tables', None)

            # create a stats record for this job
            Record.objects.create(time=job.end_time,
                                  resource_type='QUERY',
                                  resource={
                                      'job_id': job.id,
                                      'job_type': job.job_type,
                                      'query': job.query,
                                      'query_language': job.query_language,
                                      'sources':
                                      job.metadata.get('sources', [])
                                  },
                                  client_ip=job.client_ip,
                                  user=job.owner)

            job.save()

    return job.phase
Beispiel #5
0
def run_database_ingest_task(job_id, file_path):
    from daiquiri.core.adapter import DatabaseAdapter
    from daiquiri.query.models import QueryJob
    from daiquiri.stats.models import Record
    from daiquiri.query.utils import get_quota, ingest_table

    # get logger
    logger = logging.getLogger(__name__)

    # get the job object from the database
    job = QueryJob.objects.get(pk=job_id)

    if job.phase == job.PHASE_QUEUED:
        # get the adapter with the database specific functions
        adapter = DatabaseAdapter()

        # create the database of the user if it not already exists
        try:
            adapter.create_user_schema_if_not_exists(job.schema_name)
        except OperationalError as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            job.save()

            return job.phase

        # check if the quota is exceeded
        if QueryJob.objects.get_size(job.owner) > get_quota(job.owner):
            job.phase = job.PHASE_ERROR
            job.error_summary = str(
                _('Quota is exceeded. Please remove some of your jobs.'))
            job.save()

            return job.phase

        # set database and start time
        job.pid = adapter.fetch_pid()
        job.phase = job.PHASE_EXECUTING
        job.start_time = now()
        job.save()

        logger.info('job %s started' % job.id)

        # create the table and insert the data
        try:
            columns = ingest_table(job.schema_name, job.table_name, file_path)

        except (ProgrammingError, InternalError, ValueError) as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        except OperationalError as e:
            # load the job again and check if the job was killed
            job = QueryJob.objects.get(pk=job_id)

            if job.phase != job.PHASE_ABORTED:
                job.phase = job.PHASE_ERROR
                job.error_summary = str(e)
                logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        else:
            # get additional information about the completed job
            job.phase = job.PHASE_COMPLETED
            logger.info('job %s completed' % job.id)

        finally:
            # get timing and save the job object
            job.end_time = now()

            # get additional information about the completed job
            if job.phase == job.PHASE_COMPLETED:
                job.nrows = adapter.count_rows(job.schema_name, job.table_name)
                job.size = adapter.fetch_size(job.schema_name, job.table_name)

                # store the metadata for the columns from the VOTable
                job.metadata = {'columns': columns}

            # create a stats record for this job
            Record.objects.create(time=job.end_time,
                                  resource_type='UPLOAD',
                                  resource={
                                      'job_id': job.id,
                                      'job_type': job.job_type,
                                  },
                                  client_ip=job.client_ip,
                                  user=job.owner)

            job.save()

    return job.phase