def resolve_remote_url(self): """ Resolve the remote url using instance attributes Attempts to resolve the remote url and match the named arguments in the url to any instance attributes that are set. Otherwise issues a warning for the user to resolve manually. """ if not self.remote or (self.remote and not self.remote.url): return names = self.remote.extract_url_brackets() if names: params = {} for name in names: if hasattr(self, name): params[name] = getattr(self, name) try: self.remote.resolve_url(params) except KeyError as exc: raise BrainError( f'Cannot resolve remote url. Missing keys {exc} found.' ) from exc else: if not self.remote.has_valid_url: raise BrainError('Still invalid url')
def _determine_inputs(self, data_input: str) -> None: """ Determines what inputs to use in the decision tree. Parameters ---------- data_input : str The input string to attempt to parse into a filename or object id """ parsed_input = None if data_input: assert self.filename is None and self.objectid is None, \ 'if input is set, filename and objectid cannot be set.' if not isinstance(data_input, (six.string_types, pathlib.Path)): raise TypeError('input must be a string or pathlib.Path') # parse the input data into either a filename or objectid parsed_input = self._parse_input(data_input) if not parsed_input: self.filename = data_input else: assert isinstance( parsed_input, dict), 'return value of _parse_input must be a dict' self.filename = parsed_input.get('filename', None) self.objectid = parsed_input.get('objectid', None) # ensure either filename or objectid is specified if self.filename is None and self.objectid is None: raise BrainError( 'no inputs defined. filename and objectid are both None') # convert filename to a pathlib.Path and resolve a relative name # not using pathlib.resolve to preserve symlinks if self.filename: self.filename = pathlib.Path(os.path.abspath(self.filename)) # issue a warning if the release is not indicated in the filename; possible mismatch if self.release.lower() not in self.filename.as_posix(): warnings.warn( 'Your filename may not match the release indicated. Path parameters ' 'may not be extracted properly. Try setting the release to match the ' 'known file version.') # attempt to update the access path parameters from the filename or parsed data input self._update_access_params(params=parsed_input) # check for any misaligments and misassignments if self.filename: self.objectid = None if self.mode == 'remote': raise BrainError('filename not allowed in remote mode.') assert self.filename.exists, \ 'filename {} does not exist.'.format(str(self.filename)) elif self.objectid: assert not self.filename, 'invalid set of inputs.'
def __init__(self, data_input: str = None, filename: str = None, objectid: str = None, mode: str = None, release: str = None, download: bool = None, ignore_db: bool = None, use_db: db_type = None, version: str = None, use_api: api_type = None, async_client: bool = None) -> None: # set a version for sdsswork data checked_release = release or config.release if version: self.set_work_version(version) if checked_release.lower() != 'work': raise BrainError( 'version is only used for "work" data. ' 'Please set the input or config release to "WORK"') else: if not self._version and checked_release.lower() == 'work': raise BrainError( 'You are using a "work" release but have no work versions set! ' 'Try setting a global "work_version" dict or specify a "version" ' 'input!') # initialize the MMA self._mma.__init__(self, data_input=data_input, filename=filename, objectid=objectid, mode=mode, release=release, download=download, ignore_db=ignore_db, use_db=use_db or self._db) # set up any ApiHandler remote = use_api or self._api or config.apis.profile async_client = async_client or self.async_client if remote: self._api = ApiHandler(remote, async_client=async_client) self._check_remote_url() # load the data if self.data_origin == 'file': self._load_object_from_file() elif self.data_origin == 'db': self._load_object_from_db() elif self.data_origin == 'api': self._load_object_from_api()
def load_schema(self, schema: str): """ Load an sdssdb schema module Loads an input sdssdb schema module name from the currently loaded database and into the handler. The input name can be dot-qualified as "[orm].[database].[schema]" and it will attempt to find and load the correct module. For example ``targetdb``, ``sdss5db.targetdb``, ``peewee.sdss5db.targetdb`` are all valid names. Parameters ---------- schema : str The name of an sdssdb schema module Raises ------ TypeError when the input is not a string BrainError when no database connection is present BrainError when no schema module is found by importlib.import_module """ if type(schema) != str: raise TypeError(f'Input {schema} must be a string.') if not self.db: raise BrainError(f'No db present. Cannot load schema {schema}.') orm = 'sqlalchemy' if self.orm == 'sqla' else 'peewee' dbname = self.db.dbname if self.db else '' if schema.count('.') == 0: modname = f'sdssdb.{orm}.{dbname}.{schema}' elif schema.count('.') == 1: modname = f'sdssdb.{orm}.{schema}' elif schema.count('.') == 2: modname = f'sdssdb.{schema}' else: modname = schema try: self.models = importlib.import_module(modname) except ModuleNotFoundError as e: raise BrainError(f'No module found matching {modname}') from e else: self.schema = self.models.__name__.split('.', 2)[-1] self.model = None
def set_api(self, use_api: Union[str, Type[ApiProfile]], domain: str = None, test: bool = None) -> None: """ Set the API profile to use in the http client Sets the API profile to use for all remote http requests. Parameters ---------- use_api : Union[str, Type[ApiProfile]] A API name or `~sdss_brain.api.manager.ApiProfile` domain : str, optional The domain name to use for the API, by default None test : bool, optional If True, toggles the development API, by default None Raises ------ BrainError when no API profile is set """ api = use_api or config.apis.profile if isinstance(api, ApiProfile): self.api = api elif type(api) == str: apim.set_profile(api, test=test, domain=domain) self.api = apim.profile else: self.api = None if not self.api: raise BrainError( 'No API is set. Set one either by specifying "use_api" on input or ' 'setting one on the global config.')
def set_url(self, route: str) -> None: """ Sets the url route to use on the http client Sets the url to use in the http client for remote requests. When the input route is a full url starting with "http", uses the url as specified. When the input route is a segment, will construct a full url using the base url of the preset SDSS API profile. Parameters ---------- route : str A url or route segment Raises ------ BrainError when no API profile is set """ if not route: return elif route.startswith('http'): self.url = route log.debug(f'Using fully qualified url {route}.') else: if not self.api: raise BrainError( f'No API is set. Cannot construct a full url for {route}') self.url = self.api.construct_route(route) log.debug( f'Building url from input route {route} and selected API {self.api}.' )
def load_model(self, model: str): """ Loads an ORM model Loads an input ORM model name from the currently loaded database and schema into the handler. Parameters ---------- model : str The name of an ORM model to load Raises ------ BrainError when no valid schema is set AttributeError when no model is found within the loaded schema """ if not self.models: raise BrainError('No valid schema set containing ORM models.') model_obj = getattr(self.models, model, None) if not model_obj: raise AttributeError( f'schema {self.schema} does not have model {model}') self.model = model_obj
def send_post_request(url: str, data: dict = None) -> dict: """ A simple httpx post request A simple standalone httpx post request to a specified url, and given an optional data payload. Parameters ---------- url : str The url to send the request to data : dict, optional Input data to send along with the request, by default None Returns ------- dict Extracted response data from response.json() Raises ------ BrainError when an error occurs sending the request """ try: resp = httpx.post(url, data=data) except httpx.RequestError as exc: raise BrainError( f'An error occurred requesting {exc.request.url!r}') from exc else: resp.raise_for_status() data = resp.json() return data
def load_fits_file(filename: str) -> fits.HDUList: ''' Load a FITS file Opens and loads a FITS file with astropy.io.fits. Parameters ---------- filename : str A FITS filen to open Returns ------- hdulist : `~astropy.io.fits.HDUList` an Astropy HDUList ''' path = pathlib.Path(filename) if not path.exists() and path.is_file(): raise FileNotFoundError('input filename must exist and be a file') assert '.fits' in path.suffixes, 'filename is not a valid FITS file' try: hdulist = fits.open(path) except (IOError, OSError) as err: log.error(f'Cannot open FITS file {filename}: {err}') raise BrainError( f'Failed to open FITS files {filename}: {err}') from err else: return hdulist
def load_url(self, route: str) -> None: """ Loads a url constructed from an API base url Construct a new url, given the input route segment, for a given API profile. Parameters ---------- route : str The url path route segment Raises ------ TypeError when input route is not a string BrainError when no API profile is set on the handler """ if type(route) != str: raise TypeError('Input route must be a string.') if not self.api: raise BrainError('No API profile is set. Cannot construct a url.') self.url = self.api.construct_route(route) self.client.set_url(self.url)
def read_netrc(self, host: str) -> tuple: """ Read the netrc file for a given host Reads the username, password for the given host machine. Note this returns plaintext username and password. Do not write out these values some place transparent and publicly visible. Parameters ---------- host : str The netrc machine name Returns ------- tuple Plain text netrc username, password Raises ------ BrainError when netrc file fails to pass checks ValueError when input host is not valid """ if not self.check_netrc(): raise BrainError('netrc did not pass checks. Cannot read!') if not self.check_host(host): raise ValueError(f'{host} must be a valid host in the netrc') netfile = netrc.netrc(self.path) user, acct, passwd = netfile.authenticators(host) # pylint: disable=unused-variable return user, passwd
def _do_remote(self) -> None: """ Check if remote connection is possible.""" if self.filename: raise BrainError('filename not allowed in remote mode.') else: self.mode = 'remote' self.data_origin = 'api'
def release(self, value: str) -> None: value = value.upper() if value not in self._allowed_releases: raise BrainError( f'trying to set an invalid release version {value}. ' f'Valid releases are: {", ".join(self._allowed_releases)}') # if work release or IPL, check for a validated user if 'dr' not in value.lower() and not self.user.validated: raise BrainError( f'User {self.user} is not validated. Can only access public data. ' 'Cannot access "work" data or internal SDSS releases. Consider ' 'validating the user, or check your SDSS netrc credentials.') # replant the tree if value.lower() == 'work': tree.replant_tree('sdsswork') else: tree.replant_tree(value.lower()) self._release = value
def check_netrc(self) -> None: """ Validates the netrc file """ # check for file existence if not self.path.is_file(): raise BrainError(f'No .netrc file found at {self.path}!') # check for correct permissions if oct(self.path.stat().st_mode)[-3:] != '600': raise BrainError( 'Your .netrc file does not have 600 permissions. Please fix it by ' 'running chmod 600 on it. Authentication will not work with ' 'permissions different from 600.') # read the netrc file try: netfile = netrc.netrc(self.path) except netrc.NetrcParseError as nerr: raise BrainError( f'Your netrc file was not parsed correctly. Error: {nerr}' ) from nerr # check the netrc file has the allowed SDSS host machines nethosts = netfile.hosts.keys() badlist = [] for host in self.allowed_hosts: self._valid_hosts[host] = host in nethosts if host not in nethosts: badlist.append(host) # check that the required domains are included required = set(['data.sdss.org', 'api.sdss.org']) & set(badlist) if required: warnings.warn( f"Hosts {', '.join(required)} not found in netrc. " "You will not be able to remotely access SDSS data.", UserWarning) # validate if any are good return any(self._valid_hosts.values())
def _do_local(self) -> None: """ Check if it's possible to load the data locally.""" if self.filename: # check if the file exists locally if self.filename.exists(): self.mode = 'local' self.data_origin = 'file' else: raise BrainError('input file {0} not found'.format( self.filename)) elif self.objectid: # prioritize a database unless explicitly set to ignore if self.db and self.db.connected and not self._ignore_db: self.mode = 'local' self.data_origin = 'db' else: # retrieve the full local sdss_access path fullpath = self.get_full_path() if fullpath and os.path.exists(fullpath): self.mode = 'local' self.filename = pathlib.Path(fullpath) self.data_origin = 'file' else: # optionally download the file if self._forcedownload: self.download() self.data_origin = 'file' else: raise BrainError('failed to retrieve data using ' 'input parameters.')
def _check_htpass(self) -> None: """ Check existence of the htpasswd file Reads in the htpass file with passlib.apache.HtpasswdFile Raises ------ ImportError when passlib package not installed BrainError when the provided htpass file path does not exist """ if not passlib: raise ImportError('passlib package not installed. Cannot use Htpass.') if not self.path.is_file(): raise BrainError(f'No .htpasswd file found at {self.path}!') self.htpass = HtpasswdFile(self.path)
def set_user(self, user: str = 'sdss', password: str = None) -> None: """ Set a new global user Sets a new global `~sdss_brain.auth.user.User`. By default with be set to the generic "sdss" user. Can override the default by setting the "default_username" and "default_userpass" keywords in the custom YAML configuration file. Parameters ---------- user : str, optional The username to use, by default 'sdss' password : str, optional The password to use to validate the user, by default None. Raises ------ BrainError when a user cannot be validated """ default_user = self._custom_config.get('default_username', None) default_pass = self._custom_config.get('default_userpass', None) user = default_user or user password = default_pass or password log.debug( f'Setting user {user}. ' '{"No password specified." if not password else "Password specified."}' ) self.user = User(user) if not self.user.validated and user and password: self.user.validate_user(password) if not self.user.validated: raise BrainError(f'Could not validate default user {user}!') else: log.debug(f'Validated user {user}') if not self.user.validated: log.warning( f'User {user} is not validated. Check your netrc credentials ' 'or validate your user with config.set_user(username, password)' )
def _create_token_auth_header(self) -> dict: """ Create a new request authorization header Creates a new http request header containing valid Bearer token authorization. Returns ------- dict A new http request header to be added to the outgoing request Raises ------ BrainError when no API profile has been set ValueError when a token cannot be retrieved """ if not self.api: raise BrainError( "No API profile set. Cannot created token auth header.") if self.api.auth_type != 'token': return {} token = self.api.token if not token: log.info('No token found. Attempting to retreive one.') token = self.get_token() if not token: raise ValueError( f'No token retrieved for API {self.api.name}. Check for a valid ' 'user, try self.get_token again, and save your token!') else: log.info( "This token is temporary. To permanently set one, run self.api.get_token " "and save it in your config file or as an environment variable." ) return {'Authorization': f'Bearer {token}'}
def _check_response(self, resp: Type[httpx.Response]) -> None: """ Checks the returned httpx response Checks the httpx response, raises exception for any problem status code, and attempts to extract the response content. Parameters ---------- resp : Type[httpx.Response] The returned httpx response instance Raises ------ BrainError when the response is not ok """ resp.raise_for_status() if resp.is_error: raise BrainError('There was an error in the response!') self.response = resp self._return_data()
def _validate_request(self, url: str, method: str) -> None: """ Validates some inputs to the request wrapper Validates the wrapper request method for a proper url and input method type. Parameters ---------- url : str The request url method : str The type of http request Raises ------ ValueError when an invalid method is passed ValueError when no url is set """ if url: self.set_url(url) if method not in ['get', 'post', 'stream', 'head']: raise ValueError( 'http request method type can only be "get", "post", ' '"head", or "stream".') if not self.url: raise ValueError( 'No url set. Cannot make a request. Please specify a ' 'url or route segment.') # look for any bracketed named arguments in the url parts = re.findall(r'{(.*?)}', self.url) if parts: raise BrainError( f'Request url contains bracket arguments: "{", ".join(parts)}". ' 'Cannot send request until these are properly replaced.')
def get_full_path(self, url: str = None, force_file: bool = None) -> str: """ Returns the full path of the file in the tree. Parameters ---------- url : bool If True, specifies the url location rather than the local file location force_file : bool If True, explicitly returns any set filename attribute instead of constructing a path from keyword arguments. Returns ------- fullpath : str The full path as built by sdss_access """ if force_file: return self.filename log.debug( f'getting full path for {self.path_name} and params {self.path_params}' ) msg = 'sdss_access was not able to retrieve the full path of the file.' fullpath = None try: if url: fullpath = self.access.url(self.path_name, **self.path_params) else: fullpath = self.access.full(self.path_name, **self.path_params) except TypeError as ee: warnings.warn(msg + 'Error: {0}'.format(str(ee)), BrainUserWarning) raise BrainError(f'Bad input type for sdss_access: {ee}') from ee except Exception as ee: warnings.warn(msg + 'Error: {0}'.format(str(ee)), BrainUserWarning) return fullpath
async def request(self, url: str = None, data: dict = None, method: str = 'get', files: dict = None, content: bytes = None, **kwargs): """ Submit a http request with httpx This is a convenience method that wraps `httpx.AsyncClient.request`. It provides support for simple "get", "post", or "stream" requests. For more custom, or complete, control over sending requests, use the `client` attribute, and see `httpx AsyncClient <https://www.python-httpx.org/async/>`_ and `httpx docs <https://www.python-httpx.org/quickstart/>`_. for more. Parameters ---------- url : str, optional A url or route segment to send a request to, by default None data : dict, optional Any data passed along in the request, by default None method : str, optional The type of http request method, by default 'get' files : dict, optional Input for multi-part file uploads, by default None content : bytes, optional Input for binary content, by default None kwargs : Any, optional Any other httpx request keyword arguments Raises ------ BrainError when there is an error sending the request """ # validate the input self._validate_request(url, method) # add any token auth to headers headers = None if self.api and self.api.auth_type == 'token': headers = self._create_token_auth_header() # prepare the data with the release data = self.prepare_data(data) try: # try to send the request if method == 'stream': resp = await self._stream_request() else: params, data, json = (data, None, None) if method == 'get' else (None, data, data) resp = await self.client.request(method, self.url, params=params, data=data, json=json, files=files, content=content, headers=headers, **kwargs) except httpx.RequestError as exc: raise BrainError( f'An error occurred requesting {exc.request.url!r}') from exc else: # check the response and set the data attribute self._check_response(resp) finally: await self.client.aclose()
def get_token(self, user: str) -> str: """ Request and receive a valid API auth token Requests an auth token for the specified user. This uses found netrc authentication to attempt to request and retrieve a valid token. The token should be saved in an "XXX_API_TOKEN" environment variable or in the custom sdss_brain.yml configuration file as "xxx_api_token", where "XXX" is the API profile name. Parameters ---------- user : str The name of the SDSS user Returns ------- str A valid API auth token Raises ------ BrainError when the user is not netrc validated BrainError when a token cannot be extracted from the http response """ if self.token: return self.token auth = self.info.get('auth', None) if auth['type'] != 'token': log.info( f'Auth type for API {self.name} is not "token". No token needed.' ) return if type(user) == str: user = User(user) if not user.validated and not user.is_netrc_valid: raise BrainError( f'User {user.name} is not netrc validated! Cannot access credentials.' ) username, password = user.netrc.read_netrc('api.sdss.org') token_url = self.construct_token_url() data = send_post_request(token_url, data={ 'username': username, 'password': password }) token = data.get( 'token', data.get('access_token', data.get('user_token', data.get('sdss_token', None)))) if not token: raise BrainError( 'Token request successful but could not extract token ' 'from response data. Check the returned json response ' 'for prope key name') else: tok_name = f'{self.name.upper()}_API_TOKEN' log.info( f'Save this token as either a "{tok_name}" environment variable in your ' f'.bashrc or as "{tok_name.lower()}" in your custom sdss_brain.yml config file.' ) return token
def release(self, value: str): """Fails when trying to set the release after instantiation.""" raise BrainError( 'the release cannot be changed once the object has been instantiated.' )