Example #1
0
    def handle(self, *args, **options):
        if options['user']:
            if options['user'] == 'anonymous':
                owners = [None]
            else:
                owners = [User.objects.get(username=options['user'])]

        else:
            owners = [None] + list(User.objects.all())

        adapter = DatabaseAdapter()

        stale_jobs = []
        for owner in owners:
            jobs = QueryJob.objects.filter(owner=owner)
            for job in jobs:
                if job.phase == job.PHASE_COMPLETED:
                    if not adapter.fetch_table(job.schema_name, job.table_name):
                        stale_jobs.append(job)

        if stale_jobs:
            print('The following QueryJobs have no associated database table:')

            for job in stale_jobs:
                username = job.owner.username if job.owner else 'anonymous'
                print('%s by %s -> %s.%s' % (job.id, username, job.schema_name, job.table_name))

            if options['archive']:
                for job in stale_jobs:
                    job.archive()

                print('The jobs have been archived.')
        else:
            print('No QueryJobs without associated associated database table have been found.')
Example #2
0
    def create(self, request, *args, **kwargs):

        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        table = serializer.save()

        if request.data.get('discover'):
            adapter = DatabaseAdapter()

            for column_metadata in adapter.fetch_columns(
                    table.schema.name, table.name):
                column_metadata['table'] = table.id
                column_metadata['groups'] = [
                    group.id for group in table.groups.all()
                ]
                for key in ['access_level', 'metadata_access_level']:
                    column_metadata[key] = getattr(table, key)

                column_serializer = ColumnSerializer(data=column_metadata)
                if column_serializer.is_valid():
                    column_serializer.save()

        headers = self.get_success_headers(serializer.data)
        return Response(serializer.data,
                        status=status.HTTP_201_CREATED,
                        headers=headers)
Example #3
0
    def clean(self, request, resource):
        if resource not in self.get_resources():
            raise NotFound()

        adapter = DatabaseAdapter()

        self.sql = '''
SELECT %(id_field)s, %(ra_field)s, %(dec_field)s
FROM %(schema)s.%(table)s
WHERE
    %(ra_field)s BETWEEN (%%(RA)s - 0.5 * %%(SR)s) AND (%%(RA)s + 0.5 * %%(SR)s)
AND
    %(dec_field)s BETWEEN (%%(DEC)s - 0.5 * %%(SR)s) AND (%%(DEC)s + 0.5 * %%(SR)s)
LIMIT %(limit)s
''' % {
            'id_field': adapter.escape_identifier('id'),
            'ra_field': adapter.escape_identifier('ra'),
            'dec_field': adapter.escape_identifier('dec'),
            'schema': settings.CONESEARCH_SCHEMA,
            'table': settings.CONESEARCH_TABLE,
            'limit': self.max_records
        }

        self.args, errors = self.parse_query_dict(request)

        if errors:
            raise ValidationError(errors)
Example #4
0
    def discover(self, request):
        schema_name = request.GET.get('schema')
        table_name = request.GET.get('table')

        if schema_name and table_name:
            adapter = DatabaseAdapter()
            table_metadata = adapter.fetch_table(schema_name, table_name)
            table_metadata['nrows'] = adapter.fetch_nrows(
                schema_name, table_name)
            table_metadata['size'] = adapter.fetch_size(
                schema_name, table_name)
            return Response([table_metadata])
        else:
            return Response([])
Example #5
0
def get_job_column(job, display_column_name):
    try:
        schema_name, table_name, column_name = \
            job.metadata['display_columns'][display_column_name]
    except (ValueError, KeyError):
        return {}

    if schema_name == settings.TAP_UPLOAD:
        # for TAP_UPLOAD get the information directly from the database
        return DatabaseAdapter().fetch_column(schema_name, table_name,
                                              column_name)

    else:
        # for regular schemas consult the metadata store
        try:
            column = Column.objects.get(name=column_name,
                                        table__name=table_name,
                                        table__schema__name=schema_name)

            return {
                'name': column.name,
                'description': column.description,
                'unit': column.unit,
                'ucd': column.ucd,
                'utype': column.utype,
                'datatype': column.datatype,
                'arraysize': column.arraysize,
                'principal': column.principal,
                'indexed': False,
                'std': column.std
            }

        except Column.DoesNotExist:
            return {}
Example #6
0
 def get_datalink_list(self):
     for table in self.tables:
         schema_name, table_name = table.split('.')
         for row in DatabaseAdapter().fetch_rows(schema_name,
                                                 table_name,
                                                 page_size=0):
             yield 'datalink', row
Example #7
0
 def test_fetch_rows_filter(self):
     rows = DatabaseAdapter().fetch_rows('daiquiri_data_sim',
                                         'halos',
                                         filters={'id': '85000000000'})
     self.assertEqual(len(rows), 1)
     self.assertEqual(len(rows[0]), 8)
     self.assertEqual(rows[0][0], 85000000000)
Example #8
0
def process_display_columns(processor_display_columns):
    # process display_columns to expand *
    display_columns = []
    for processor_display_column, original_column in processor_display_columns:
        if processor_display_column == '*':
            schema_name, table_name, tmp = original_column
            for column_name in DatabaseAdapter().fetch_column_names(
                    schema_name, table_name):
                display_columns.append(
                    (column_name, (schema_name, table_name, column_name)))

        else:
            display_columns.append((processor_display_column, original_column))

    # check for duplicate columns in display_columns
    seen = set()
    errors = []
    for display_column_name, display_column in display_columns:
        if display_column_name not in seen:
            seen.add(display_column_name)
        else:
            errors.append(
                _('Duplicate column name %(column)s') %
                {'column': display_column_name})

    if errors:
        raise ValidationError({'query': [error for error in errors]})

    return OrderedDict(display_columns)
Example #9
0
 def test_fetch_row(self):
     row = DatabaseAdapter().fetch_row(
         'daiquiri_data_obs',
         'stars',
         column_names=None,
         search=None,
         filters={'id': '4551299946478123136'})
     self.assertEqual(row[0], 4551299946478123136)
Example #10
0
def drop_database_table_task(schema_name, table_name):
    from daiquiri.core.adapter import DatabaseAdapter

    # drop the corresponding database table, but fail silently
    try:
        DatabaseAdapter().drop_table(schema_name, table_name)
    except ProgrammingError:
        pass
Example #11
0
def abort_databae_query_task(pid):
    from daiquiri.core.adapter import DatabaseAdapter

    # abort the job on the database
    try:
        DatabaseAdapter().abort_query(pid)
    except OperationalError:
        # the query was probably killed before
        pass
Example #12
0
def process_query(query):
    # get the adapter
    adapter = DatabaseAdapter()

    try:
        if adapter.database_config['ENGINE'] == 'django.db.backends.mysql':

            from queryparser.mysql import MySQLQueryProcessor
            processor = MySQLQueryProcessor(query)

        elif adapter.database_config[
                'ENGINE'] == 'django.db.backends.postgresql':

            from queryparser.postgresql import PostgreSQLQueryProcessor

            if settings.QUERY_PROCESSOR_CACHE:
                processor = cache.get_or_set('processor',
                                             PostgreSQLQueryProcessor(), 3600)
            else:
                processor = PostgreSQLQueryProcessor()

            # first run to replace with get_indexed_objects
            processor.set_query(query)
            processor.process_query(indexed_objects=get_indexed_objects(),
                                    replace_schema_name={
                                        'TAP_SCHEMA': settings.TAP_SCHEMA,
                                        'tap_schema': settings.TAP_SCHEMA,
                                        'TAP_UPLOAD': settings.TAP_UPLOAD,
                                        'tap_upload': settings.TAP_UPLOAD,
                                    })

            # second run
            processor.set_query(processor.query)
            processor.process_query()

        else:
            raise Exception('Unknown database engine')

    except QuerySyntaxError as e:
        raise ValidationError({
            'query': {
                'messages':
                [_('There has been an error while parsing your query.')],
                'positions':
                json.dumps(e.syntax_errors),
            }
        })

    except QueryError as e:
        raise ValidationError({'query': {
            'messages': e.messages,
        }})

    return processor
Example #13
0
    def retrieve(self, request, pk=None):
        # get database adapter
        adapter = DatabaseAdapter()

        # get the schema_name and the table_name from the settings
        schema_name = settings.ARCHIVE_SCHEMA
        table_name = settings.ARCHIVE_TABLE

        # get collecions for this user
        collections = [
            collection.name for collection in
            Collection.objects.filter_by_access_level(request.user)
        ]

        # fetch the path for this file from the database
        row = adapter.fetch_row(schema_name,
                                table_name, ['path'],
                                filters={
                                    'id': pk,
                                    'collection': collections
                                })

        if row:
            if request.GET.get('download', True):
                # create a stats record for this download
                Record.objects.create(time=now(),
                                      resource_type='ARCHIVE_DOWNLOAD',
                                      resource=row[0],
                                      client_ip=get_client_ip(request),
                                      user=request.user if
                                      request.user.is_authenticated else None)

                # send the file to the client
                file_path = os.path.join(settings.ARCHIVE_BASE_PATH, row[0])
                return sendfile(request, file_path, attachment=True)
            else:
                # send an empty response
                return Response()

        # if the file was not found, return 404
        raise NotFound()
Example #14
0
    def discover(self, request):
        schema_name = request.GET.get('schema')
        table_name = request.GET.get('table')
        column_name = request.GET.get('column')

        if schema_name and table_name and column_name:
            return Response([
                DatabaseAdapter().fetch_column(schema_name, table_name,
                                               column_name)
            ])
        else:
            return Response([])
Example #15
0
    def list(self, request, *args, **kwargs):

        # get schema, table and column_names from the querystring
        schema_name = self.request.GET.get('schema')
        table_name = self.request.GET.get('table')
        column_names = self.request.GET.getlist('column')

        # get the columns which the user is allowed to access
        user_columns = get_user_columns(self.request.user, schema_name, table_name)

        if user_columns:
            # get the row query params from the request
            ordering, page, page_size, search, filters = self._get_query_params(user_columns)

            # filter by input column names by the the allowed columns
            if column_names:
                column_names = [column.name for column in user_columns if column.name in column_names]
            else:
                column_names = [column.name for column in user_columns]

            # get database adapter
            adapter = DatabaseAdapter()

            # query the database for the total number of rows
            count = adapter.count_rows(schema_name, table_name, column_names, search, filters)

            # query the paginated rowset
            results = adapter.fetch_rows(schema_name, table_name, column_names, ordering, page, page_size, search, filters)

            # return ordered dict to be send as json
            return Response(OrderedDict((
                ('count', count),
                ('results', fix_for_json(results)),
                ('next', self._get_next_url(page, page_size, count)),
                ('previous', self._get_previous_url(page))
            )))

        # if nothing worked, return 404
        raise NotFound()
Example #16
0
    def run_sync(self):
        adapter = DatabaseAdapter()

        self.actual_query = adapter.build_sync_query(
            self.native_query, settings.QUERY_SYNC_TIMEOUT, self.max_records)

        job_sources = get_job_sources(self)

        # create a stats record for this job
        Record.objects.create(time=now(),
                              resource_type='QUERY',
                              resource={
                                  'job_id': None,
                                  'job_type': self.job_type,
                                  'query': self.query,
                                  'query_language': self.query_language,
                                  'sources': job_sources
                              },
                              client_ip=self.client_ip,
                              user=self.owner)

        try:
            download_adapter = DownloadAdapter()

            yield from generate_votable(
                adapter.fetchall(self.actual_query),
                get_job_columns(self),
                table=download_adapter.get_table_name(self.schema_name,
                                                      self.table_name),
                infos=download_adapter.get_infos('OK', self.query,
                                                 self.query_language,
                                                 job_sources),
                links=download_adapter.get_links(job_sources))
            self.drop_uploads()

        except (OperationalError, ProgrammingError, InternalError,
                DataError) as e:
            raise StopIteration()
Example #17
0
    def handle(self, *args, **options):
        if options['user']:
            usernames = [options['user']]
        else:
            usernames = ['anonymous'] + list(User.objects.values_list('username', flat=True))

        adapter = DatabaseAdapter()

        stale_tables = []
        for username in usernames:
            schema_name = settings.QUERY_USER_SCHEMA_PREFIX + username

            tables = adapter.fetch_tables(schema_name)
            jobs = QueryJob.objects.filter(schema_name=schema_name)

            for table in tables:
                job = QueryJob.objects.filter(
                    schema_name=schema_name,
                    table_name=table['name']
                ).first()

                if job and job.phase not in [QueryJob.PHASE_EXECUTING, QueryJob.PHASE_COMPLETED]:
                    stale_tables.append((schema_name, table['name'], job.phase if job else None))

        if stale_tables:
            print('The following database tables have no associated QueryJob:')

            for stale_table in stale_tables:
                print('%s.%s -> %s' % stale_table)

            if options['delete']:
                for schema_name, table_name, phase in stale_tables:
                    adapter.drop_table(schema_name, table_name)

                print('The tables have been deleted.')
        else:
            print('No tables without associated QueryJob have been found.')
Example #18
0
    def rows(self, column_names, ordering, page, page_size, search, filters):
        if self.phase == self.PHASE_COMPLETED:
            # check if the columns are actually in the jobs table
            errors = {}

            for column_name in column_names:
                if column_name not in self.column_names:
                    errors[column_name] = _('Column not found.')

            if errors:
                raise ValidationError(errors)

            # get database adapter
            adapter = DatabaseAdapter()

            try:
                # query the database for the total number of rows
                count = adapter.count_rows(self.schema_name, self.table_name,
                                           column_names, search, filters)

                # query the paginated rowset
                rows = adapter.fetch_rows(self.schema_name, self.table_name,
                                          column_names, ordering, page,
                                          page_size, search, filters)

                # flatten the list if only one column is retrieved
                if len(column_names) == 1:
                    return count, [element for row in rows for element in row]
                else:
                    return count, rows

            except ProgrammingError:
                return 0, []

        else:
            raise ValidationError({'phase': ['Job is not COMPLETED.']})
Example #19
0
    def list(self, request, *args, **kwargs):
        # get the row query params from the request
        ordering, page, page_size, search, filters = self._get_query_params(
            settings.ARCHIVE_COLUMNS)

        # get database adapter
        adapter = DatabaseAdapter()

        # get the schema_name and the table_name from the settings
        schema_name = settings.ARCHIVE_SCHEMA
        table_name = settings.ARCHIVE_TABLE

        # get collecions for this user and add them to the filters
        collections = [
            collection.name for collection in
            Collection.objects.filter_by_access_level(request.user)
        ]
        filters['collection'] = collections

        # get the name of the columns
        column_names = [column['name'] for column in settings.ARCHIVE_COLUMNS]

        # query the database for the total number of rows
        count = adapter.count_rows(schema_name, table_name, column_names,
                                   search, filters)

        # query the paginated rowset
        results = adapter.fetch_rows(schema_name, table_name, column_names,
                                     ordering, page, page_size, search,
                                     filters)

        # return ordered dict to be send as json
        return Response(
            OrderedDict((('count', count), ('results', results),
                         ('next', self._get_next_url(page, page_size, count)),
                         ('previous', self._get_previous_url(page)))))
Example #20
0
def get_job_columns(job):
    columns = []

    if job.phase == job.PHASE_COMPLETED:
        database_columns = DatabaseAdapter().fetch_columns(
            job.schema_name, job.table_name)

        for database_column in database_columns:
            column = get_job_column(job, database_column['name'])
            column.update(database_column)
            columns.append(column)

    else:
        for display_column in job.metadata['display_columns']:
            columns.append(get_job_column(job, display_column))

    return columns
Example #21
0
    def _test_detail_viewset(self, username):

        schema_name = settings.ARCHIVE_SCHEMA
        table_name = settings.ARCHIVE_TABLE

        rows = DatabaseAdapter().fetch_rows(schema_name, table_name, ['id', 'collection'], None, None, 0, None, None)

        for row in rows:

            url = reverse(self.url_name, kwargs={'pk': row[0]})
            status_code = self.get_status_code(username, row[1])

            response = self.client.get(url)

            msg = OrderedDict((
                ('username', username),
                ('url', url),
                ('row', row),
                ('status_code', response.status_code)
            ))

            self.assertEqual(response.status_code, status_code, msg=msg)
Example #22
0
    def handle(self, *args, **options):
        # look for completed jobs with no table
        queryset = QueryJob.objects.filter(phase=QueryJob.PHASE_COMPLETED)

        if options['user']:
            queryset = queryset.filter(owner__username=options['user'])

        for job in queryset:
            try:
                DatabaseAdapter().fetch_size(job.schema_name, job.table_name)
            except ProgrammingError:
                try:
                    job.phase = QueryJob.PHASE_PENDING

                    if options['queue']:
                        job.queue = options['queue']

                    job.process()

                    print('Run %s by %s again.' % (job.id, job.owner))

                    if not options['dry']:
                        job.run()

                except ValidationError as e:
                    job.phase = QueryJob.PHASE_ERROR

                    job.error_summary = ''
                    for key, errors in e.detail.items():
                        try:
                            job.error_summary += ''.join(errors['messages'])
                        except TypeError:
                            job.error_summary += ''.join(errors)

                    print('Error for %s by %s: %s' %
                          (job.id, job.owner, job.error_summary))

                    if not options['dry']:
                        job.save()
Example #23
0
    def process(self):
        if self.job.phase == self.PHASE_COMPLETED:
            self.owner = self.job.owner
        else:
            raise ValidationError({'phase': ['Job is not COMPLETED.']})

        if not self.column_name:
            raise ValidationError(
                {'column_name': [_('This field may not be blank.')]})

        if self.column_name not in self.job.column_names:
            raise ValidationError({
                'column_name': [_('Unknown column "%s".') % self.column_name]
            })

        # get database adapter and query the paginated rowset
        rows = DatabaseAdapter().fetch_rows(self.job.schema_name,
                                            self.job.table_name,
                                            [self.column_name],
                                            page_size=0)

        # prepare list of files for this job
        files = []
        for row in rows:
            file_path = row[0]

            # append the file to the list of files  if it exists
            if file_path and check_file(self.owner, file_path):
                files.append(file_path)
            else:
                raise ValidationError({
                    'files': [_('One or more of the files cannot be found.')]
                })

        # set files for this job
        self.files = files

        # set clean flag
        self.is_clean = True
Example #24
0
def ingest_table(schema_name, table_name, file_path, drop_table=False):
    adapter = DatabaseAdapter()

    table = parse_single_table(file_path, pedantic=False)

    columns = []
    for field in table.fields:
        columns.append({
            'name': field.name,
            'datatype': field.datatype,
            'ucd': field.ucd,
            'unit': str(field.unit),
        })

    if drop_table:
        adapter.drop_table(schema_name, table_name)

    adapter.create_table(schema_name, table_name, columns)
    adapter.insert_rows(schema_name, table_name, columns, table.array,
                        table.array.mask)

    os.remove(file_path)

    return columns
Example #25
0
def translate_query(query_language, query):
    # get the adapter
    adapter = DatabaseAdapter()

    # translate adql -> mysql string
    if query_language == 'adql-2.0':
        try:
            translator = cache.get_or_set('translator', ADQLQueryTranslator(),
                                          3600)
            translator.set_query(query)

            if adapter.database_config['ENGINE'] == 'django.db.backends.mysql':
                return translator.to_mysql()
            elif adapter.database_config[
                    'ENGINE'] == 'django.db.backends.postgresql':
                return translator.to_postgresql()
            else:
                raise Exception('Unknown database engine')

        except QuerySyntaxError as e:
            raise ValidationError({
                'query': {
                    'messages': [
                        _('There has been an error while translating your query.'
                          )
                    ],
                    'positions':
                    json.dumps(e.syntax_errors),
                }
            })

        except QueryError as e:
            raise ValidationError({'query': {
                'messages': e.messages,
            }})

    else:
        return query
Example #26
0
    def process(self):
        # get collections for the owner of this download job
        collections = [
            collection.name for collection in
            Collection.objects.filter_by_access_level(self.owner)
        ]

        # get database adapter
        adapter = DatabaseAdapter()

        # get the schema_name and the table_name from the settings
        schema_name = settings.ARCHIVE_SCHEMA
        table_name = settings.ARCHIVE_TABLE

        # prepare list of files for this archive job
        files = []

        if 'file_ids' in self.data:
            if isinstance(self.data, QueryDict):
                file_ids = self.data.getlist('file_ids')
            else:
                file_ids = self.data.get('file_ids')

            for file_id in file_ids:
                # validate that the file_id is a valid UUID4
                try:
                    uuid.UUID(file_id, version=4)
                except ValueError:
                    raise ValidationError({
                        'files': [
                            _('One or more of the identifiers are not valid UUIDs.'
                              )
                        ]
                    })

                # fetch the path for this file from the database
                row = adapter.fetch_row(schema_name,
                                        table_name, ['path'],
                                        filters={
                                            'id': file_id,
                                            'collection': collections
                                        })

                # append the file to the list of files only if it exists in the database and on the filesystem
                if row and os.path.isfile(
                        os.path.join(settings.ARCHIVE_BASE_PATH, row[0])):
                    files.append(row[0])
                else:
                    raise ValidationError({
                        'files':
                        [_('One or more of the files cannot be found.')]
                    })

        elif 'search' in self.data:
            # retrieve the pathes of all file matching the search criteria
            rows = adapter.fetch_rows(schema_name,
                                      table_name,
                                      page_size=0,
                                      search=self.data['search'],
                                      filters={'collection': collections})

            # get the index of the path column in the row
            path_index = next(
                (i for i, column in enumerate(settings.ARCHIVE_COLUMNS)
                 if column['name'] == 'path'))

            for row in rows:
                # append the file to the list of files only if it exists on the filesystem
                if os.path.isfile(
                        os.path.join(settings.ARCHIVE_BASE_PATH,
                                     row[path_index])):
                    files.append(row[path_index])
                else:
                    raise ValidationError({
                        'files':
                        [_('One or more of the files cannot be found.')]
                    })

        else:
            raise ValidationError({[_('No data received.')]})

        # set files and file_path for this archive job
        self.files = files

        # set clean flag
        self.is_clean = True
Example #27
0
def run_database_query_task(job_id):
    # always import daiquiri packages inside the task
    from daiquiri.core.adapter import DatabaseAdapter
    from daiquiri.query.models import QueryJob
    from daiquiri.query.utils import get_quota, get_job_sources, get_job_columns, ingest_uploads
    from daiquiri.stats.models import Record

    # get logger
    logger = logging.getLogger(__name__)

    # get the job object from the database
    job = QueryJob.objects.get(pk=job_id)

    if job.phase == job.PHASE_QUEUED:
        # get the adapter with the database specific functions
        adapter = DatabaseAdapter()

        # create the database of the user if it not already exists
        try:
            adapter.create_user_schema_if_not_exists(job.schema_name)
        except OperationalError as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            job.save()

            return job.phase

        # check if the quota is exceeded
        if QueryJob.objects.get_size(job.owner) > get_quota(job.owner):
            job.phase = job.PHASE_ERROR
            job.error_summary = str(
                _('Quota is exceeded. Please remove some of your jobs.'))
            job.save()

            return job.phase

        # set database and start time
        job.pid = adapter.fetch_pid()
        job.actual_query = adapter.build_query(job.schema_name, job.table_name,
                                               job.native_query, job.timeout,
                                               job.max_records)
        job.phase = job.PHASE_EXECUTING
        job.start_time = now()
        job.save()

        logger.info('job %s started' % job.id)

        # get the actual query and submit the job to the database
        try:
            ingest_uploads(job.uploads, job.owner)

            # this is where the work ist done (and the time is spend)
            adapter.submit_query(job.actual_query)

        except (ProgrammingError, InternalError, ValueError) as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        except OperationalError as e:
            # load the job again and check if the job was killed
            job = QueryJob.objects.get(pk=job_id)

            if job.phase != job.PHASE_ABORTED:
                job.phase = job.PHASE_ERROR
                job.error_summary = str(e)
                logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        else:
            # get additional information about the completed job
            job.phase = job.PHASE_COMPLETED
            logger.info('job %s completed' % job.id)

        finally:
            # get timing and save the job object
            job.end_time = now()

            # get additional information about the completed job
            if job.phase == job.PHASE_COMPLETED:
                job.nrows = adapter.count_rows(job.schema_name, job.table_name)
                job.size = adapter.fetch_size(job.schema_name, job.table_name)

                # fetch the metadata for used tables
                job.metadata['sources'] = get_job_sources(job)

                # fetch the metadata for the columns and fetch additional metadata from the metadata store
                job.metadata['columns'] = get_job_columns(job)

            # remove unneeded metadata
            job.metadata.pop('display_columns', None)
            job.metadata.pop('tables', None)

            # create a stats record for this job
            Record.objects.create(time=job.end_time,
                                  resource_type='QUERY',
                                  resource={
                                      'job_id': job.id,
                                      'job_type': job.job_type,
                                      'query': job.query,
                                      'query_language': job.query_language,
                                      'sources':
                                      job.metadata.get('sources', [])
                                  },
                                  client_ip=job.client_ip,
                                  user=job.owner)

            job.save()

    return job.phase
Example #28
0
def rename_database_table_task(schema_name, table_name, new_table_name):
    from daiquiri.core.adapter import DatabaseAdapter

    DatabaseAdapter().rename_table(schema_name, table_name, new_table_name)
Example #29
0
def run_database_ingest_task(job_id, file_path):
    from daiquiri.core.adapter import DatabaseAdapter
    from daiquiri.query.models import QueryJob
    from daiquiri.stats.models import Record
    from daiquiri.query.utils import get_quota, ingest_table

    # get logger
    logger = logging.getLogger(__name__)

    # get the job object from the database
    job = QueryJob.objects.get(pk=job_id)

    if job.phase == job.PHASE_QUEUED:
        # get the adapter with the database specific functions
        adapter = DatabaseAdapter()

        # create the database of the user if it not already exists
        try:
            adapter.create_user_schema_if_not_exists(job.schema_name)
        except OperationalError as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            job.save()

            return job.phase

        # check if the quota is exceeded
        if QueryJob.objects.get_size(job.owner) > get_quota(job.owner):
            job.phase = job.PHASE_ERROR
            job.error_summary = str(
                _('Quota is exceeded. Please remove some of your jobs.'))
            job.save()

            return job.phase

        # set database and start time
        job.pid = adapter.fetch_pid()
        job.phase = job.PHASE_EXECUTING
        job.start_time = now()
        job.save()

        logger.info('job %s started' % job.id)

        # create the table and insert the data
        try:
            columns = ingest_table(job.schema_name, job.table_name, file_path)

        except (ProgrammingError, InternalError, ValueError) as e:
            job.phase = job.PHASE_ERROR
            job.error_summary = str(e)
            logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        except OperationalError as e:
            # load the job again and check if the job was killed
            job = QueryJob.objects.get(pk=job_id)

            if job.phase != job.PHASE_ABORTED:
                job.phase = job.PHASE_ERROR
                job.error_summary = str(e)
                logger.info('job %s failed (%s)' % (job.id, job.error_summary))

        else:
            # get additional information about the completed job
            job.phase = job.PHASE_COMPLETED
            logger.info('job %s completed' % job.id)

        finally:
            # get timing and save the job object
            job.end_time = now()

            # get additional information about the completed job
            if job.phase == job.PHASE_COMPLETED:
                job.nrows = adapter.count_rows(job.schema_name, job.table_name)
                job.size = adapter.fetch_size(job.schema_name, job.table_name)

                # store the metadata for the columns from the VOTable
                job.metadata = {'columns': columns}

            # create a stats record for this job
            Record.objects.create(time=job.end_time,
                                  resource_type='UPLOAD',
                                  resource={
                                      'job_id': job.id,
                                      'job_type': job.job_type,
                                  },
                                  client_ip=job.client_ip,
                                  user=job.owner)

            job.save()

    return job.phase
Example #30
0
def check_permissions(user, keywords, tables, columns, functions):
    messages = []

    # check keywords against whitelist
    for keywords in keywords:
        pass

    # loop over tables to check permissions on schemas/tables
    for schema_name, table_name in tables:

        # check permission on schema
        if schema_name is None:
            # schema_name must not be null, move to next table
            messages.append(_('No schema given for table %s.') % table_name)
            continue
        elif schema_name == get_user_schema_name(user):
            # all tables are allowed move to next table
            continue
        elif schema_name == settings.TAP_UPLOAD:
            # all tables are allowed move to next table
            continue
        else:
            # check permissions on the schema
            try:
                schema = Schema.objects.filter_by_access_level(user).get(
                    name=schema_name)
            except Schema.DoesNotExist:
                # schema not found or not allowed, move to next table
                messages.append(_('Schema %s not found.') % schema_name)
                continue

        # check permission on table
        if table_name is None:
            # table_name must not be null, move to next table
            messages.append(_('No table given for schema %s.') % schema_name)
            continue
        else:
            try:
                Table.objects.filter_by_access_level(user).filter(
                    schema=schema).get(name=table_name)
            except Table.DoesNotExist:
                # table not found or not allowed, move to next table
                messages.append(_('Table %s not found.') % table_name)
                continue

    # loop over columns to check permissions or just to see if they are there,
    # but only if no error messages where appended so far
    if not messages:

        for schema_name, table_name, column_name in columns:

            if schema_name in [None, get_user_schema_name(user), settings.TAP_UPLOAD] \
                    or table_name is None \
                    or column_name is None:
                # doesn't need to be checked, move to next column
                continue
            else:
                if not settings.METADATA_COLUMN_PERMISSIONS:
                    # just check if the column exist
                    if column_name == '*':
                        # doesn't need to be checked, move to next table
                        continue

                    else:
                        try:
                            Column.objects.filter(
                                table__schema__name=schema_name).filter(
                                    table__name=table_name).get(
                                        name=column_name)
                        except Column.DoesNotExist:
                            messages.append(
                                _('Column %s not found.') % column_name)
                            continue
                else:
                    try:
                        schema = Schema.objects.filter_by_access_level(
                            user).get(name=schema_name)
                    except Schema.DoesNotExist:
                        messages.append(
                            _('Schema %s not found.') % schema_name)
                        continue

                    try:
                        table = Table.objects.filter_by_access_level(
                            user).filter(schema=schema).get(name=table_name)
                    except Table.DoesNotExist:
                        messages.append(_('Table %s not found.') % table_name)
                        continue

                    if column_name == '*':
                        columns = Column.objects.filter_by_access_level(
                            user).filter(table=table)
                        actual_columns = DatabaseAdapter().fetch_columns(
                            schema_name, table_name)

                        column_names_set = set(
                            [column.name for column in columns])
                        actual_column_names_set = set(
                            [column['name'] for column in actual_columns])

                        if column_names_set != actual_column_names_set:
                            messages.append(
                                _('The asterisk (*) is not allowed for this table.'
                                  ))
                            continue

                    else:
                        try:
                            column = Column.objects.filter_by_access_level(
                                user).filter(table=table).get(name=column_name)
                        except Column.DoesNotExist:
                            messages.append(
                                _('Column %s not found.') % column_name)
                            continue

    # check permissions on functions
    for function_name in functions:

        # check permission on function
        queryset = Function.objects.filter(name=function_name)

        # forbit the function if it is in metadata.functions, and the user doesn't have access.
        if queryset and not queryset.filter_by_access_level(user):
            messages.append(_('Function %s is not allowed.') % function_name)
        else:
            continue

    # return the error stack
    return list(set(messages))