def list(self, request, *args, **kwargs): # get schema, table and column_names from the querystring schema_name = self.request.GET.get('schema') table_name = self.request.GET.get('table') column_names = self.request.GET.getlist('column') # get the columns which the user is allowed to access user_columns = get_user_columns(self.request.user, schema_name, table_name) if user_columns: # get the row query params from the request ordering, page, page_size, search, filters = self._get_query_params(user_columns) # filter by input column names by the the allowed columns if column_names: column_names = [column.name for column in user_columns if column.name in column_names] else: column_names = [column.name for column in user_columns] # get database adapter adapter = DatabaseAdapter() # query the database for the total number of rows count = adapter.count_rows(schema_name, table_name, column_names, search, filters) # query the paginated rowset results = adapter.fetch_rows(schema_name, table_name, column_names, ordering, page, page_size, search, filters) # return ordered dict to be send as json return Response(OrderedDict(( ('count', count), ('results', fix_for_json(results)), ('next', self._get_next_url(page, page_size, count)), ('previous', self._get_previous_url(page)) ))) # if nothing worked, return 404 raise NotFound()
def rows(self, column_names, ordering, page, page_size, search, filters): if self.phase == self.PHASE_COMPLETED: # check if the columns are actually in the jobs table errors = {} for column_name in column_names: if column_name not in self.column_names: errors[column_name] = _('Column not found.') if errors: raise ValidationError(errors) # get database adapter adapter = DatabaseAdapter() try: # query the database for the total number of rows count = adapter.count_rows(self.schema_name, self.table_name, column_names, search, filters) # query the paginated rowset rows = adapter.fetch_rows(self.schema_name, self.table_name, column_names, ordering, page, page_size, search, filters) # flatten the list if only one column is retrieved if len(column_names) == 1: return count, [element for row in rows for element in row] else: return count, rows except ProgrammingError: return 0, [] else: raise ValidationError({'phase': ['Job is not COMPLETED.']})
def list(self, request, *args, **kwargs): # get the row query params from the request ordering, page, page_size, search, filters = self._get_query_params( settings.ARCHIVE_COLUMNS) # get database adapter adapter = DatabaseAdapter() # get the schema_name and the table_name from the settings schema_name = settings.ARCHIVE_SCHEMA table_name = settings.ARCHIVE_TABLE # get collecions for this user and add them to the filters collections = [ collection.name for collection in Collection.objects.filter_by_access_level(request.user) ] filters['collection'] = collections # get the name of the columns column_names = [column['name'] for column in settings.ARCHIVE_COLUMNS] # query the database for the total number of rows count = adapter.count_rows(schema_name, table_name, column_names, search, filters) # query the paginated rowset results = adapter.fetch_rows(schema_name, table_name, column_names, ordering, page, page_size, search, filters) # return ordered dict to be send as json return Response( OrderedDict((('count', count), ('results', results), ('next', self._get_next_url(page, page_size, count)), ('previous', self._get_previous_url(page)))))
def run_database_query_task(job_id): # always import daiquiri packages inside the task from daiquiri.core.adapter import DatabaseAdapter from daiquiri.query.models import QueryJob from daiquiri.query.utils import get_quota, get_job_sources, get_job_columns, ingest_uploads from daiquiri.stats.models import Record # get logger logger = logging.getLogger(__name__) # get the job object from the database job = QueryJob.objects.get(pk=job_id) if job.phase == job.PHASE_QUEUED: # get the adapter with the database specific functions adapter = DatabaseAdapter() # create the database of the user if it not already exists try: adapter.create_user_schema_if_not_exists(job.schema_name) except OperationalError as e: job.phase = job.PHASE_ERROR job.error_summary = str(e) job.save() return job.phase # check if the quota is exceeded if QueryJob.objects.get_size(job.owner) > get_quota(job.owner): job.phase = job.PHASE_ERROR job.error_summary = str( _('Quota is exceeded. Please remove some of your jobs.')) job.save() return job.phase # set database and start time job.pid = adapter.fetch_pid() job.actual_query = adapter.build_query(job.schema_name, job.table_name, job.native_query, job.timeout, job.max_records) job.phase = job.PHASE_EXECUTING job.start_time = now() job.save() logger.info('job %s started' % job.id) # get the actual query and submit the job to the database try: ingest_uploads(job.uploads, job.owner) # this is where the work ist done (and the time is spend) adapter.submit_query(job.actual_query) except (ProgrammingError, InternalError, ValueError) as e: job.phase = job.PHASE_ERROR job.error_summary = str(e) logger.info('job %s failed (%s)' % (job.id, job.error_summary)) except OperationalError as e: # load the job again and check if the job was killed job = QueryJob.objects.get(pk=job_id) if job.phase != job.PHASE_ABORTED: job.phase = job.PHASE_ERROR job.error_summary = str(e) logger.info('job %s failed (%s)' % (job.id, job.error_summary)) else: # get additional information about the completed job job.phase = job.PHASE_COMPLETED logger.info('job %s completed' % job.id) finally: # get timing and save the job object job.end_time = now() # get additional information about the completed job if job.phase == job.PHASE_COMPLETED: job.nrows = adapter.count_rows(job.schema_name, job.table_name) job.size = adapter.fetch_size(job.schema_name, job.table_name) # fetch the metadata for used tables job.metadata['sources'] = get_job_sources(job) # fetch the metadata for the columns and fetch additional metadata from the metadata store job.metadata['columns'] = get_job_columns(job) # remove unneeded metadata job.metadata.pop('display_columns', None) job.metadata.pop('tables', None) # create a stats record for this job Record.objects.create(time=job.end_time, resource_type='QUERY', resource={ 'job_id': job.id, 'job_type': job.job_type, 'query': job.query, 'query_language': job.query_language, 'sources': job.metadata.get('sources', []) }, client_ip=job.client_ip, user=job.owner) job.save() return job.phase
def run_database_ingest_task(job_id, file_path): from daiquiri.core.adapter import DatabaseAdapter from daiquiri.query.models import QueryJob from daiquiri.stats.models import Record from daiquiri.query.utils import get_quota, ingest_table # get logger logger = logging.getLogger(__name__) # get the job object from the database job = QueryJob.objects.get(pk=job_id) if job.phase == job.PHASE_QUEUED: # get the adapter with the database specific functions adapter = DatabaseAdapter() # create the database of the user if it not already exists try: adapter.create_user_schema_if_not_exists(job.schema_name) except OperationalError as e: job.phase = job.PHASE_ERROR job.error_summary = str(e) job.save() return job.phase # check if the quota is exceeded if QueryJob.objects.get_size(job.owner) > get_quota(job.owner): job.phase = job.PHASE_ERROR job.error_summary = str( _('Quota is exceeded. Please remove some of your jobs.')) job.save() return job.phase # set database and start time job.pid = adapter.fetch_pid() job.phase = job.PHASE_EXECUTING job.start_time = now() job.save() logger.info('job %s started' % job.id) # create the table and insert the data try: columns = ingest_table(job.schema_name, job.table_name, file_path) except (ProgrammingError, InternalError, ValueError) as e: job.phase = job.PHASE_ERROR job.error_summary = str(e) logger.info('job %s failed (%s)' % (job.id, job.error_summary)) except OperationalError as e: # load the job again and check if the job was killed job = QueryJob.objects.get(pk=job_id) if job.phase != job.PHASE_ABORTED: job.phase = job.PHASE_ERROR job.error_summary = str(e) logger.info('job %s failed (%s)' % (job.id, job.error_summary)) else: # get additional information about the completed job job.phase = job.PHASE_COMPLETED logger.info('job %s completed' % job.id) finally: # get timing and save the job object job.end_time = now() # get additional information about the completed job if job.phase == job.PHASE_COMPLETED: job.nrows = adapter.count_rows(job.schema_name, job.table_name) job.size = adapter.fetch_size(job.schema_name, job.table_name) # store the metadata for the columns from the VOTable job.metadata = {'columns': columns} # create a stats record for this job Record.objects.create(time=job.end_time, resource_type='UPLOAD', resource={ 'job_id': job.id, 'job_type': job.job_type, }, client_ip=job.client_ip, user=job.owner) job.save() return job.phase