def on_get(self, request, resp, **kwargs): """ respond with Python module representation of the current config """ session_user = auth.get_user_id(request) with warehouse.get_source_model_session() as dwsupport_model: if not management_auth.is_management_permitted( session_user, dwsupport_model): msg = 'Warehouse management not authorized' raise falcon.HTTPUnauthorized(title='401', description=msg) #else requested_format = kwargs['format'].lower() if requested_format not in formats: msg = 'Dump format not found. Leave blank for Python (py) or include a supported format in dump URL: {}'.format( formats) raise falcon.HTTPNotFound(title='404', description=msg) #else if requested_format == 'py': conf_module_text = DWSUPPORT_CONFIG_HEADER conf_module_text += 'model = {}'.format(get_model_string()) conf_module_text += '\n' resp.body = conf_module_text return if requested_format == 'json': resp.body = json.dumps({'model': dwsupport_model}) return
def on_get(self, request, response, **kwargs): """ Falcon resource method for handling the HTTP request GET method Falcon API provides: parameters embedded in URL via a keyword args dict, as well as convenience class variables falcon.HTTP_* """ with warehouse.get_source_model_session() as current_model: #get Source dataset object, referenced in URL path sources = source.SourceUtil.get_list_of_data_sources( request.url, auth.get_user_id(request), current_model) dataset_id = source_parameters.get_requested_dataset_id( sources, request, response, kwargs) if warehouse.is_warehouse(dataset_id): msg = 'Metadata.xml not available for source ID: ' + dataset_id raise falcon.HTTPInvalidParam(msg, 'source') dataset = None for table in current_model['tables']: table_id = '{}.{}'.format(table['project'], table['name']) if table_id == dataset_id: dataset = table break iso_xml = geographic_metadata(dataset, current_model) #return generated XML response.content_type = 'text/xml' response.body = iso_xml
def on_get(self, request, resp, **kwargs): """ Falcon resource method, for handling HTTP request GET method Falcon request provides: parameters embedded in URL via a keyword args dict, as well as convenience class variables falcon.HTTP_* """ api_config = loader.get_api_config() request_url = ResourceUtil.get_request_url(request, api_config) with warehouse.get_source_model_session() as model: sources = source.SourceUtil.get_list_of_data_sources( request_url, auth.get_user_id(request), model) str_dataset_id = source_parameters.get_requested_dataset_id( sources, request, resp, kwargs) dict_variables = ResourceUtil.get_self_dict(request_url) try: dwsupport_variables = get_list_of_variables(str_dataset_id) except NotImplementedError as e: #error; not a warehouse request & Dataset does not match requested ID logging.exception(e) raise falcon.HTTPError(falcon.HTTP_NOT_IMPLEMENTED, 'Not Implemented', str(e)) # format the DWSupport info, for publishing tables, associations = model['tables'], model['associations'] list_variables = [ to_web_variable(v, tables, associations) for v in dwsupport_variables ] # alphabatize the fields list_variables.sort(key=lambda v: v['id']) dict_variables[str_route] = list_variables str_nesting_indent_char = '\t' resp.body = json.dumps(dict_variables, indent=str_nesting_indent_char)
def on_get(self, request, resp, **kwargs): """ direct user to POST the login details """ session_user = auth.get_user_id(request) with warehouse.get_source_model_session() as dwsupport_model: if not management_auth.is_management_permitted(session_user, dwsupport_model): msg = 'Warehouse management not authorized' raise falcon.HTTPUnauthorized(title='401', description=msg) allowed = self.allowed_methods msg = "GET method not allowed, Please POST new table parameters to 'copy'" raise falcon.HTTPMethodNotAllowed(allowed, description = msg)
def get_list_of_variables(str_dataset_id): """ Returns a list of dicts,representing all of source's available variables Keyword Parameters: str_dataset_id -- String, representing API id for the dataset """ if warehouse.is_warehouse(str_dataset_id): return warehouse.get_list_of_warehouse_variables() list_variables = [] for dict_source in loader.get_list_of_etl_dicts(): if dict_source['id'] == str_dataset_id: # retrieve & decode the configured list of fields+types str_field_types_json = dict_source['python_types'] dict_field_types = json.loads(str_field_types_json) # add the field names, to our list list_variables.extend(dict_field_types.keys()) return list_variables # if loop did not return,continue search through db-configured sources # break dataset identifier down into project/source substrings with warehouse.get_source_model_session() as current_model: project_name, source_name = str_dataset_id.split('.') source_tables = warehouse.get_source_tables() for source_table in source_tables: if source_table['name'] == source_name: variables_by_field = {} source_type = source_table['type'] if not source_type in ['fact', 'dimension', 'dimension role']: #TODO: Make exception into a locally defined class raise NotImplementedError( 'no method to list variables for {} tables: {}'.format( source_type, source_name)) if source_type == 'fact': two_dicts = warehouse.get_fact_variables( source_table, current_model) variables_by_field, unused = two_dicts if source_type == 'dimension': # retrieve the fields -to-types mapping variables_by_field = warehouse.get_variables(source_table) if source_type == 'dimension role': # retrieve aliased versions, of underlying dimension's mapping variables_by_field = warehouse.get_role_variables( source_table) # add the variable dicts, to our list list_variables.extend(variables_by_field.values()) return list_variables else: str_msg = 'Unable to list variables, source id {} not found.' raise falcon.HTTPNotFound( description=str_msg.format(str_dataset_id))
def _build_repo_if_missing(self): """ utility method to build repo if one does not yet exist Exceptions: TempRepoLocked -- raised when PyCSW repo is under construction by another thread or process & is not available for this call. """ try: with open(self._get_repo_path()) as test: pass except FileNotFoundError: # repo seems to be missing, build new repo & respond with warehouse.get_source_model_session() as dwsupport_model: self.build_repo(dwsupport_model)
def on_get(self, request, resp): """ Falcon resource method, for HTTP request method: GET Falcon request provides a request.url convenience instance variable """ api_config = api.config_loader.get_api_config() request_url = SourceUtil.get_request_url(request, api_config) with warehouse.get_source_model_session() as model: sources = SourceUtil.get_list_of_data_sources( request_url, auth.get_user_id(request), model) # Build a dict, representing the Source RESTful entity/endpoint dictSource = SourceUtil.get_source(sources, request_url) str_nesting_indent_char = '\t' resp.body = json.dumps(dictSource, indent=str_nesting_indent_char)
def copy_table( source_id, new_project_name, new_table_name ,new_custom_id_by_old_ids): """ Add table & related objects to dwsupport,copied from source_id Fact Returns 5-tuple, representing new Table DTO and lists of new association DTOs, variable DTOs, variable_custom_identifier DTOs & query DTOs Keyword Parameters: source_id -- Sting, identifying the project Fact table to copy new_project_name -- String, representing project name copy will belong to new_table_name -- String, representing name for the new table new_custom_id_by_old_ids -- Dict, representing new custom variable IDs, mapped by the existing custom IDs they're replacing Exceptions: CopyTableUnsupportedTableType -- unsupported source_id CopyTableDuplicateCopyName -- new_table_name already exists CopyTableMissingVariableCustomIdentifiers -- missing values in new_custom_id_by_old_ids CopyTableNonuniqueVariableCustomIdentifiers -- values in new_custom_id_by_old_ids are not globally unique """ with warehouse.get_source_model_session() as dwsupport_model: # generate DTOs new_table, new_associations, new_variables, \ new_variable_custom_identifiers, new_queries = _copy_fact_table( source_id ,new_project_name ,new_table_name ,new_custom_id_by_old_ids ,dwsupport_model ) # save DTOs get_func = util.get_dwsupport_connection dto.table.save([new_table], connection_func=get_func) dto.association.save(new_associations, connection_func=get_func) dto.variable.save(new_variables, connection_func=get_func) dto.variable_custom_identifier.save( new_variable_custom_identifiers ,connection_func=get_func) dto.query.save(new_queries, connection_func=get_func) # provide caller with a copy, of the new DTO info return new_table, new_associations, new_variables \ ,new_variable_custom_identifiers, new_queries
def update_csw_repo(): """ function to build new PyCSW repository then schedule a rebuild """ # construct a new PyCSW sqlite store try: bogus_wsgi_environ = { 'bogus': 'value' } #build_repo doesn't use environ with warehouse.get_source_model_session() as dwsupport_model: CswResponder(bogus_wsgi_environ, api_temp_subfolder_name).build_repo(dwsupport_model) except TempRepoLocked as e: msg = "Skipping CSW repo update, an update is already in-progress" logger.warning(msg) logger.debug(e.__cause__, exc_info=True) # schedule the next update schedule_update()
def on_get(self, request, resp, **kwargs): """ return JSON representing referenced table's associated columns """ session_user = auth.get_user_id(request) with warehouse.get_source_model_session() as dwsupport_model: if not management_auth.is_management_permitted( session_user, dwsupport_model): msg = 'Warehouse management not authorized' raise falcon.HTTPUnauthorized(title='401', description=msg) #else sources = source.SourceUtil.get_list_of_data_sources( request.url, auth.get_user_id(request), dwsupport_model) requested_source_id = selection.get_requested_dataset_id( sources, request, resp, kwargs) rows_for_management_app = get_variable_identifier_queries_dicts( requested_source_id, dwsupport_model) resp.body = json.dumps({'variables': rows_for_management_app}, indent='\t') return
def get_result_from_db(str_source_id, variables=[], filters=[], columns=[], empty_cell_dimensions=[], user_id=None): """ returns list of db result rows, w/ 1x 'header' row prepended Keyword Parameters: str_source_id -- String, representing requested dataset's API id variables -- list of requested variable names filters -- list of specified filter expression strings columns -- list of names for requested dimension variables to be pivoted as additional columns,for all requested value variables empty_cell_dimensions -- list of Strings representing Dimension tables (or OLAP-Roles) which are to be OUTER JOINED to produce empty Fact value cells for all Dimensional values not found in the fact. user_id -- String, representing an authenticated User principal Exceptions: FilterVariableError -- filters variable not found in header NoSourceException -- raised when no dataset matches str_source_id NotAuthorizedException -- user_id not authorized to select data from the specified source. """ #generate query tables = warehouse.get_source_tables() # break dataset identifier down into project/source substrings project_name, source_name = str_source_id.split('.') for warehouse_table in tables: if warehouse_table['name'] == source_name: # get connection connection = util._get_source_connection(warehouse.dict_source) with warehouse.get_source_model_session() as cached_model: if warehouse_table['confidential']: # Attempt to obtain a sensitive connection IF user is authorized if not auth.is_select_permitted(user_id, warehouse_table, cached_model): raise NotAuthorizedException() connection.close() connection = util._get_source_connection({ 'id': 'Fake .ini source', 'db_file': 'db_dwsensitive.ini' }) # retrieve filter info if warehouse_table['type'] == 'fact': two_dicts = warehouse.get_fact_variables( warehouse_table, cached_model) variable_by_field, unused = two_dicts if warehouse_table['type'] == 'dimension': variable_by_field = warehouse.get_variables( warehouse_table) if warehouse_table['type'] == 'dimension role': variable_by_field = warehouse.get_role_variables( warehouse_table) python_types = {} for field in variable_by_field: var = variable_by_field[field] python_types[field] = var['python_type'] json_python_types = json.dumps(python_types) # get sql & binds try: table_type.validate(warehouse_table['type']) except table_type.ValidateUnexpectedValue as e: raise NotImplementedError( 'No SQL Generation method, for type: {}'.format( warehouse_table) ) from e #TODO: make this into a local class sql_with_filters, binds = warehouse.get_sql_filtered( warehouse_table, json_python_types, filters, empty_cell_dimensions) db_config_file_name = warehouse.dict_source['db_file'] break # source found, exit! else: raise NoSourceException(e) if len(binds) > 0: result = connection.execution_options(stream_results=True).execute( sql_with_filters, binds) else: result = connection.execution_options( stream_results=True).execute(sql_with_filters) # compose results list result_generator = database_row_generator(result, connection) subset_generator = parameters.get_result_subset(result_generator, variables) if columns: # pivot, i.e.: replace 'columns' fields & all measured value # fields with new measured-value breakdowns for the 'columns' # field values. all_variables = warehouse.get_source_variables() fact_variables = [ v for v in all_variables if v['table'] == source_name ] return pivot.get_result(subset_generator, columns, fact_variables) #else, no pivot needed - just return return subset_generator
def on_post(self, request, resp, **kwargs): """ Create/update table column, associated with referenced source_id TODO: add a JSON response body,compatible with DataTables Editor TODO2: improve documentation, unit test coverage """ session_user = auth.get_user_id(request) with warehouse.get_source_model_session() as dwsupport_model: if not management_auth.is_management_permitted( session_user, dwsupport_model): msg = 'Warehouse management not authorized' raise falcon.HTTPUnauthorized(title='401', description=msg) #else sources = source.SourceUtil.get_list_of_data_sources( request.url, auth.get_user_id(request), dwsupport_model) requested_source_id = selection.get_requested_dataset_id( sources, request, resp, kwargs) source_project, source_table = requested_source_id.split( '.') #TODO: refactor this # Add DTO variable (if needed) get_func = util.get_dwsupport_connection if request.params['action'] == 'create': table_name = request.params['data[0][table]'] column_name = request.params['data[0][column]'] python_type = request.params['data[0][python_type]'] column_title = request.params['data[0][title]'] variable_dto = { 'table': table_name, 'column': column_name, 'title': column_title, 'python_type': python_type, 'physical_type': None, 'units': None, 'max_length': None, 'precision': None, 'allowed_values': None, 'description': None } dto.variable.save([variable_dto], connection_func=get_func) # get new default Queries this column should be in ## DataTables editor returns URLEncoded table,column defaults ## in format: data[{table_name}.{column_name}][defaults] = '{query1}(,{queryN})' key_prefix = 'data[' key_suffix = '][column]' column_key_generator = (key for key in request.params if key.endswith(key_suffix)) column_key = next(column_key_generator) # get column details table_dot_column_plus_suffix = column_key[len(key_prefix):] table_dot_column = table_dot_column_plus_suffix[:len(key_suffix) * -1] table_name, column_name = table_dot_column.split('.') # get query details defaults_key = 'data[{}.{}][defaults]'.format( table_name, column_name) try: defaults_text = request.params[defaults_key] default_queries = { query.strip().lower() #parse text for query in defaults_text.split(',') } except KeyError as defaults_empty_or_missing: default_queries = set() query_variable_table = table_name # get table_name for a role association_key = 'data[{}.{}][association]'.format( table_name, column_name) try: association_column = request.params[association_key] association_dto = next( (association for association in dwsupport_model['associations'] if association['table'] == source_table and association['column'] == association_column)) query_variable_table = association_dto['parent'] except KeyError as association_empty_or_missing: pass # done. no able association is specified # update DTOs changes = dict() #track changes changes['add'] = list() changes['update'] = list() for query_name in default_queries: try: # add column query_dto = next((query for query in dwsupport_model['queries'] if query['name'] == query_name and query['table'] == source_table)) try: query_dto['variables'][table_name].append(column_name) except KeyError as new_table: query_dto['variables'][table_name] = [column_name] dto.query.update_by_table_and_name( source_table, query_name, query_dto, connection_func=get_func) changes['update'].append(query_dto) except StopIteration as no_query_exists: query_dto = { 'name': query_name, 'table': source_table, 'variables': { table_name: [column_name] } } dto.query.save([query_dto], connection_func=get_func) changes['add'].append(query_dto) if default_queries == set(): # remove column for query_dto in (query for query in dwsupport_model['queries'] if query['table'] == source_table): variable_tables = query_dto['variables'].keys() if (query_variable_table in variable_tables and column_name in query_dto['variables'][query_variable_table]): query_dto['variables'][query_variable_table].remove( column_name) if len(query_dto['variables'] [query_variable_table]) == 0: del (query_dto['variables'][query_variable_table]) dto.query.update_by_table_and_name( source_table, query_dto['name'], query_dto, connection_func=get_func) changes['update'].append(query_dto) # JSON response per https://editor.datatables.net/manual/server msg = None if len(changes) == 0: msg = "No changes made" resp.body = json.dumps({'data': [changes], "error": msg}) return
def on_get(self, request, resp, **kwargs): """ Falcon resource method, for handling HTTP request GET method Falcon request provides: parameters embedded in URL via a keyword args dict, as well as convenience class variables falcon.HTTP_* FIXME: remove moduel pylint:disable= & refactor this overlong code block! """ # obtain logged in API user ID (if available) api_session_user = auth.get_user_id(request) # select data with warehouse.get_source_model_session() as dwsupport_model: sources = source.SourceUtil.get_list_of_data_sources( request.url ,auth.get_user_id(request) ,dwsupport_model) str_dataset_id = get_requested_dataset_id( sources, request, resp, kwargs) list_variables_requested_source = variables.get_list_of_variables( str_dataset_id) # convert 'datasets' into a list of variables list_requested_datasets = parameters.get_requested_datasets( request) list_variables_from_datasets = [] for str_id in list_requested_datasets: if str_dataset_id == str_id: list_variables_from_datasets = list_variables_requested_source break if str_dataset_id == 'warehouse': #FIXME: refactor this into a source.warehouse function #obtain the 'warehouse' field aliases for each dataset list_source_variables = variables.get_list_of_variables( str_id) for var in list_source_variables: warehouse_utils = api.resources.source.warehouse.warehouse str_alias = warehouse_utils.prefix_field_name( var, str_id) list_variables_from_datasets.append( str_alias) else: #error; not a warehouse request & Dataset does not match requested ID raise falcon.HTTPNotFound(description= "Unrecognized dataset: " + str_id) list_requested_variables = parameters.get_requested_variables( request) # add default variables if len(list_requested_variables) < 1: requested_default_query = parameters.get_list_requested_parameter( defaults.PARAMETER_NAME, request) try: default_variables = defaults.get_default_variables( requested_default_query ,str_dataset_id ,dwsupport_model) except defaults.UndefinedDefaultQuery as error: msg = ("Value {} is not defined for dataset: '{}'" .format(error, str_dataset_id)) raise falcon.HTTPInvalidParam(msg, defaults.PARAMETER_NAME) except defaults.AmbiguousDefaultQuery as error: msg = "More than one value was specified: {}".format(error) raise falcon.HTTPInvalidParam(msg, defaults.PARAMETER_NAME) except defaults.AmbiguousQueryHierarchy as error: raise falcon.HTTPBadRequest( #TODO: add functional test coverage title="Missing Parameter" ,description=( "Selection defaults not clear for" " data source: '{}'." " Selection must specify one or more 'variables='" " selection parameters (or a 'defaults=' parameter" " value from the following list: {})" ).format(str_dataset_id, error) ) list_requested_variables.extend(default_variables) # add variables derived from 'datasets' param list_requested_variables.extend( list_variables_from_datasets) list_requested_filters = parameters.get_requested_filters( request) # process pivot columns parameter try: pivot_column_variables = parameters.get_requested_pivot_columns( request ,str_dataset_id ,dwsupport_model['tables']) except parameters.PivotVariableError as err: raise falcon.HTTPInvalidParam( msg=str(err) ,param_name=parameters.ReservedParameterNames.pivot_columns ) from err # process 'Empty_cells' parameter try: empty_cell_dimensions = parameters.get_requested_empty_cells( request ,str_dataset_id ,dwsupport_model['tables'] ,dwsupport_model['associations'] ) except (parameters.EmptyCellsSourceError ,parameters.EmptyCellsDimensionError) as err: raise falcon.HTTPInvalidParam( msg=str(err) ,param_name=parameters.ReservedParameterNames.empty_cells ) from err # retrieve data start_time = datetime.now(pytz.timezone('US/Pacific')) try: result_generator = data.get_data(str_dataset_id ,list_requested_variables ,list_requested_filters ,pivot_column_variables ,empty_cell_dimensions ,user_id=api_session_user) except sqlalchemy.exc.DatabaseError as err: raise falcon.HTTPInternalServerError( title='500' ,description="Please try again" ) from err except data.NoSourceException as err: raise falcon.HTTPNotFound(description=("Source '{}' dataset not found:" " {}").format(str_dataset_id,err)) from err except parameters.FilterVariableError as err: #TODO: the bad HTTP parameter not always 'filters',sometimes a user-defined param (implicit-filter) #TODO: perhaps parameters should raise two different Exceptions? raise falcon.HTTPInvalidParam(str(err), 'filters') from err except data.NotAuthorizedException as error: raise falcon.HTTPUnauthorized( title='401' ,description=("Selection from sensitive data source '{}'" " not authorized").format(str_dataset_id) ) from error str_format_type = get_requested_format_type( kwargs) resp.content_type = FormatUtil.get_http_content_type(str_format_type) for data_source in sources: if data_source['id'] == str_dataset_id: formatter = FormatUtil(str_format_type, data_source, request, start_time) result_stream = formatter.format(result_generator) break chunked_stream = streaming.biggerchunks_stream(result_stream, 4)#2(13.6),3(13),4( if str_format_type == 'xlsx': byte_stream = chunked_stream #already bytes else: encoding = 'utf-8' if resp.content_type == 'text/csv': encoding = 'utf-8-sig' byte_stream = codecs.iterencode(chunked_stream, encoding) resp.stream = byte_stream#content
def on_post(self, request, resp, **kwargs): """ Make copy of referenced DWSupport table, with specified changes """ session_user = auth.get_user_id(request) with warehouse.get_source_model_session() as dwsupport_model: if not management_auth.is_management_permitted(session_user, dwsupport_model): msg = 'Warehouse management not authorized' raise falcon.HTTPUnauthorized(title='401', description=msg) #else sources = source.SourceUtil.get_list_of_data_sources( request.url ,auth.get_user_id(request) ,dwsupport_model) requested_source_id = selection.get_requested_dataset_id(sources, request, resp, kwargs) try: new_table = request.params['name'] new_project = request.params['project-name'] new_variable_custom_identifiers = request.params['variable-custom-identifiers'] except KeyError as error: raise falcon.HTTPBadRequest( #TODO: add functional test coverage title="Missing Parameter" ,description=( "Unable to make copy of" " data source: '{}'." " (Copy request must specify HTTP POST parameter: {})" ).format(requested_source_id, error)) try: new_custom_ids_by_old_id = json.loads(new_variable_custom_identifiers) except json.json.scanner.JSONDecodeError as e: msg = ("Unable to make copy of" " data source: '{}'." " (Parameter is not valid JSON object: {})" ).format(requested_source_id, e) raise falcon.HTTPInvalidParam(msg, 'variable-custom-identifiers') if type(new_custom_ids_by_old_id) != dict: msg = ("Unable to make copy of" " data source: '{}'." ' Parameter must be a JSON object: {{"existing_table_custom_variable_id": "new_id"}}' ).format(requested_source_id) raise falcon.HTTPInvalidParam(msg, 'variable-custom-identifiers') try: new_dto_tuple = configure.copy_table( requested_source_id ,new_project ,new_table ,new_custom_ids_by_old_id ) new_table, new_associations, new_variables, \ new_variable_custom_identifiers, new_queries = new_dto_tuple resp.body = json.dumps( { 'table': new_table, 'associations': new_associations ,'variables': new_variables ,'variable_custom_identifiers': new_variable_custom_identifiers ,'queries': new_queries} ,indent='\t' ) return except configure.CopyTableUnsupportedTableType as e: raise falcon.HTTPBadRequest( #TODO: add functional test coverage title="Bad Path" ,description=("Copy only supported for tables of type" " 'fact'. (The '{}' data source in URL is" " type: '{}')" ).format(requested_source_id, e) ) except configure.CopyTableDuplicateCopyName as e: msg = ("Unable to make copy of" " data source: '{}'." " (Please specify a new table name, a table with" " the provided name already exists: {})" ).format(requested_source_id, e) raise falcon.HTTPInvalidParam(msg, 'name') except configure.CopyTableNonuniqueVariableCustomIdentifiers as e: msg = ("Unable to make copy of" " data source: '{}'." " (The following new IDs must not duplicate any other" " variable custom IDs: {})" ).format(requested_source_id, e) raise falcon.HTTPInvalidParam(msg, 'variable-custom-identifiers') except configure.CopyTableMissingVariableCustomIdentifiers as e: msg = ("Unable to make copy of" " data source: '{}'." " (Copy request parameter must include new, unique" " IDs for these existing variable custom IDs: {})" ).format(requested_source_id, e) raise falcon.HTTPInvalidParam(msg, 'variable-custom-identifiers')