def update_single_config_from_path_and_value(self, path, value): """Update a single config parameter with the value. Path is a list of string, that gives a path to the config parameter to be updated. For example, path may be ["server","app","port"]. """ self.is_complete = False if not isinstance(path, list): raise ConfigurationError( f"path must be a list of strings, got '{str(path)}'") for part in path: if not isinstance(part, str): raise ConfigurationError( f"path must be a list of strings, got '{str(path)}'") if len(path) < 1 or path[0] not in ("server", "dataset"): raise ConfigurationError( "path must start with 'server', or 'dataset'") if path[0] == "server": attr = "__".join(path[1:]) try: self.update_server_config(**{attr: value}) except ConfigurationError: raise ConfigurationError( f"unknown config parameter at path: '{str(path)}'") elif path[0] == "dataset": attr = "__".join(path[1:]) try: self.update_dataset_config(**{attr: value}) except ConfigurationError: raise ConfigurationError( f"unknown config parameter at path: '{str(path)}'")
def __init__(self, server_config): super().__init__() if missingimport: raise ConfigurationError( f"oauth requires these modules: {', '.join(missingimport)}") self.algorithms = ["RS256"] self.api_base_url = server_config.authentication__params_oauth__api_base_url self.client_id = server_config.authentication__params_oauth__client_id self.client_secret = server_config.authentication__params_oauth__client_secret self.callback_base_url = server_config.authentication__params_oauth__callback_base_url self.session_cookie = server_config.authentication__params_oauth__session_cookie self.cookie_params = server_config.authentication__params_oauth__cookie self._validate_cookie_params() # set the audience self.audience = self.client_id # load the jwks (JSON Web Key Set). # The JSON Web Key Set (JWKS) is a set of keys which contains the public keys used to verify # any JSON Web Token (JWT) issued by the authorization server and signed using the RS256 try: jwksloc = f"{self.api_base_url}/.well-known/jwks.json" jwksurl = requests.get(jwksloc) self.jwks = jwksurl.json() except Exception: raise ConfigurationError( f"error in oauth, api_url_base: {self.api_base_url}, cannot access {jwksloc}" )
def handle_environment(self, context): """For each environment variable defined, get the value (if it is set), and set the specified config parameter""" self.validate_correct_type_of_configuration_attribute( "environment", list) for envdict in self.environment: name = envdict.get("name") if name is None: raise ConfigurationError("environment: 'name' is missing") required = envdict.get("required", False) if type(required) != bool: raise ConfigurationError( "environment: 'required' must be a bool") path = envdict.get("path") if path is None: raise ConfigurationError("environment: 'path' is missing") value = os.environ.get(name) if value is None: if required: raise ConfigurationError( f"required environment variable '{name}' not set") else: value = convert_string_to_value(value) self.app_config.update_single_config_from_path_and_value( path, value)
def update_from_config_file(self, config_file): try: with open(config_file) as yml_file: config = yaml.safe_load(yml_file) except yaml.YAMLError as e: raise ConfigurationError( f"The specified config file contained an error: {e}") except OSError as e: raise ConfigurationError( f"Issue retrieving the specified config file: {e}") if config.get("server"): self.server_config.update_from_config(config["server"], "server") if config.get("dataset"): self.default_dataset_config.update_from_config( config["dataset"], "dataset") per_dataset_config = config.get("per_dataset_config", {}) for key, dataroot_config in per_dataset_config.items(): # first create and initialize the dataroot with the default config self.add_dataroot_config(key, **config["dataset"]) # then apply the per dataset configuration self.dataroot_config[key].update_from_config( dataroot_config, f"per_dataset_config__{key}") if config.get("external"): self.external_config.update_from_config(config["external"], "external") self.is_complete = False
def check_config(self): if not self.is_completed: raise ConfigurationError( "The configuration has not been completed") mapping = self.__mapping(self.default_config) for key in mapping.keys(): if not self.attr_checked[key]: raise ConfigurationError( f"The attr '{key}' has not been checked")
def handle_app(self, context): self.check_attr("app__verbose", bool) self.check_attr("app__debug", bool) self.check_attr("app__host", str) self.check_attr("app__port", (type(None), int)) self.check_attr("app__open_browser", bool) self.check_attr("app__force_https", bool) self.check_attr("app__flask_secret_key", (type(None), str)) self.check_attr("app__generate_cache_control_headers", bool) self.check_attr("app__server_timing_headers", bool) self.check_attr("app__csp_directives", (type(None), dict)) if self.app__port: if not is_port_available(self.app__host, self.app__port): raise ConfigurationError( f"The port selected {self.app__port} is in use, please configure an open port." ) else: self.app__port = find_available_port(self.app__host, DEFAULT_SERVER_PORT) if self.app__debug: context["messagefn"]( "in debug mode, setting verbose=True and open_browser=False") self.app__verbose = True self.app__open_browser = False else: warnings.formatwarning = custom_format_warning if not self.app__verbose: sys.tracebacklimit = 0 # secret key: # first, from CXG_SECRET_KEY environment variable # second, from config file self.app__flask_secret_key = os.environ.get("CXG_SECRET_KEY", self.app__flask_secret_key) # CSP Directives are a dict of string: list(string) or string: string if self.app__csp_directives is not None: for k, v in self.app__csp_directives.items(): if not isinstance(k, str): raise ConfigurationError( "CSP directive names must be a string.") if isinstance(v, list): for policy in v: if not isinstance(policy, str): raise ConfigurationError( "CSP directive value must be a string or list of strings." ) elif not isinstance(v, str): raise ConfigurationError( "CSP directive value must be a string or list of strings." )
def handle_single_dataset(self, context): self.check_attr("single_dataset__datapath", (str, type(None))) self.check_attr("single_dataset__title", (str, type(None))) self.check_attr("single_dataset__about", (str, type(None))) self.check_attr("single_dataset__obs_names", (str, type(None))) self.check_attr("single_dataset__var_names", (str, type(None))) if self.single_dataset__datapath is None: if self.multi_dataset__dataroot is None: # TODO: change the error message once dataroot is fully supported raise ConfigurationError("missing datapath") return else: if self.multi_dataset__dataroot is not None: raise ConfigurationError( "must supply only one of datapath or dataroot") # create the matrix data cache manager: if self.matrix_data_cache_manager is None: self.matrix_data_cache_manager = MatrixDataCacheManager( max_cached=1, timelimit_s=None) # preload this data set matrix_data_loader = MatrixDataLoader(self.single_dataset__datapath, app_config=self.app_config) try: matrix_data_loader.pre_load_validation() except DatasetAccessError as e: raise ConfigurationError(str(e)) file_size = matrix_data_loader.file_size() file_basename = basename(self.single_dataset__datapath) if file_size > BIG_FILE_SIZE_THRESHOLD: context["messagefn"]( f"Loading data from {file_basename}, this may take a while...") else: context["messagefn"](f"Loading data from {file_basename}.") if self.single_dataset__about: def url_check(url): try: result = urlparse(url) if all([result.scheme, result.netloc]): return True else: return False except ValueError: return False if not url_check(self.single_dataset__about): raise ConfigurationError( "Must provide an absolute URL for --about. (Example format: http://example.com)" )
def set_tiledb_context(context_params): """Set the tiledb context. This should be set before any instances of CxgAdaptor are created""" try: CxgAdaptor.tiledb_ctx = tiledb.Ctx(context_params) tiledb.default_ctx(context_params) except tiledb.libtiledb.TileDBError as e: if e.message == "Global context already initialized!": if tiledb.default_ctx().config().dict() != CxgAdaptor.tiledb_ctx.config().dict(): raise ConfigurationError("Cannot change tiledb configuration once it is set") else: raise ConfigurationError(f"Invalid tiledb context: {str(e)}")
def handle_local_file_csv_annotations(self, context): dirname = self.user_annotations__local_file_csv__directory filename = self.user_annotations__local_file_csv__file genesets_filename = self.user_annotations__local_file_csv__gene_sets_file if dirname is not None and (filename is not None or genesets_filename is not None): raise ConfigurationError( "'user-generated-data-dir' may not be used with 'annotations-file' or 'gene-sets-file'." ) if filename is not None: lf_name, lf_ext = splitext(filename) if lf_ext and lf_ext != ".csv": raise ConfigurationError( f"annotation file type must be .csv: {filename}") if genesets_filename is not None: lf_name, lf_ext = splitext(genesets_filename) if lf_ext and lf_ext != ".csv": raise ConfigurationError( f"genesets file type must be .csv: {genesets_filename}") if dirname is not None and not isdir(dirname): try: os.mkdir(dirname) except OSError: raise ConfigurationError( "Unable to create directory specified by --user-generated-data-dir" ) anno_config = { "user-annotations": self.user_annotations__enable, "genesets-save": not self.user_annotations__gene_sets__readonly, } self.user_annotations = AnnotationsLocalFile(anno_config, dirname, filename, genesets_filename) # if the user has specified a fixed label file, go ahead and validate it # so that we can remove errors early in the process. server_config = self.app_config.server_config if server_config.single_dataset__datapath: data_adaptor = self.get_data_adaptor() if self.user_annotations__local_file_csv__file: self.user_annotations.read_labels(data_adaptor) if self.user_annotations__local_file_csv__gene_sets_file: try: self.user_annotations.read_gene_sets(data_adaptor, context) except (ValueError, AnnotationsError, KeyError) as e: raise ConfigurationError( f"Unable to read genesets CSV file: {str(e)}") from e
def handle_data_source(self, context): self.check_attr("single_dataset__datapath", (str, type(None))) self.check_attr("multi_dataset__dataroot", (type(None), dict, str)) if self.single_dataset__datapath is None: if self.multi_dataset__dataroot is None: # TODO: change the error message once dataroot is fully supported raise ConfigurationError("missing datapath") return else: if self.multi_dataset__dataroot is not None: raise ConfigurationError( "must supply only one of datapath or dataroot")
def update(self, **kw): for key, value in kw.items(): if not hasattr(self, key): raise ConfigurationError(f"unknown config parameter {key}.") try: setattr(self, key, value) except KeyError: raise ConfigurationError( f"Unable to set config parameter {key}.") self.attr_checked[key] = False self.is_completed = False
def update_from_config(self, config, prefix): mapping = self.create_mapping(config) for attr, (key, value) in mapping.items(): if not hasattr(self, attr): raise ConfigurationError( f"Unknown key from config file: {prefix}__{attr}") try: setattr(self, attr, value) except KeyError: raise ConfigurationError( f"Unable to set config attribute: {prefix}__{attr}") self.attr_checked[attr] = False
def handle_app(self, context): self.validate_correct_type_of_configuration_attribute( "app__verbose", bool) self.validate_correct_type_of_configuration_attribute( "app__debug", bool) self.validate_correct_type_of_configuration_attribute("app__host", str) self.validate_correct_type_of_configuration_attribute( "app__port", (type(None), int)) self.validate_correct_type_of_configuration_attribute( "app__open_browser", bool) self.validate_correct_type_of_configuration_attribute( "app__force_https", bool) self.validate_correct_type_of_configuration_attribute( "app__flask_secret_key", str) self.validate_correct_type_of_configuration_attribute( "app__generate_cache_control_headers", bool) if self.app__port: try: if not is_port_available(self.app__host, self.app__port): raise ConfigurationError( f"The port selected {self.app__port} is in use, please configure an open port." ) except OverflowError: raise ConfigurationError(f"Invalid port: {self.app__port}") else: try: default_server_port = int( os.environ.get("CXG_SERVER_PORT", DEFAULT_SERVER_PORT)) except ValueError: raise ConfigurationError( "Invalid port from environment variable CXG_SERVER_PORT: " + os.environ.get("CXG_SERVER_PORT")) try: self.app__port = find_available_port(self.app__host, default_server_port) except OverflowError: raise ConfigurationError( f"Invalid port: {default_server_port}") if self.app__debug: context["messagefn"]( "in debug mode, setting verbose=True and open_browser=False") self.app__verbose = True self.app__open_browser = False else: warnings.formatwarning = custom_format_warning if not self.app__verbose: sys.tracebacklimit = 0
def handle_data_source(self): self.validate_correct_type_of_configuration_attribute( "single_dataset__datapath", (str, type(None))) self.validate_correct_type_of_configuration_attribute( "multi_dataset__dataroot", (type(None), dict, str)) if self.single_dataset__datapath and self.multi_dataset__dataroot: raise ConfigurationError( "You must supply either a datapath (for single datasets) or a dataroot (for multidatasets). Not both" ) if self.single_dataset__datapath is None and self.multi_dataset__dataroot is None: raise ConfigurationError( "You must specify a datapath for a single dataset or a dataroot for multidatasets" )
def update(self, **kw): for key, value in kw.items(): if not hasattr(self, key): raise ConfigurationError(f"unknown config parameter {key}.") try: if type(value) == tuple: # convert tuple values to list values value = list(value) setattr(self, key, value) except KeyError: raise ConfigurationError( f"Unable to set config parameter {key}.") self.attr_checked[key] = False
def handle_X_approximate_distribution(self): self.validate_correct_type_of_configuration_attribute( "X_approximate_distribution", str) if self.X_approximate_distribution not in ["auto", "normal", "count"]: raise ConfigurationError( "X_approximate_distribution has unknown value -- must be 'auto', 'normal' or 'count'." )
def handle_authentication(self): self.validate_correct_type_of_configuration_attribute( "authentication__type", (type(None), str)) # oauth ptypes = str if self.authentication__type == "oauth" else (type(None), str) self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__oauth_api_base_url", ptypes) self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__client_id", ptypes) self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__client_secret", ptypes) self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__jwt_decode_options", (type(None), dict)) self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__session_cookie", bool) if self.authentication__params_oauth__session_cookie: self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__cookie", (type(None), dict)) else: self.validate_correct_type_of_configuration_attribute( "authentication__params_oauth__cookie", dict) self.auth = AuthTypeFactory.create(self.authentication__type, self) if self.auth is None: raise ConfigurationError( f"Unknown authentication type: {self.authentication__type}")
def import_plugins(plugin_module): """ Load optional plugin modules from server.common.plugins If you would like to customize cellxgene, you can add submodules to server.common.plugins before running the app. This code will import each, loading the code in each. If no plugins are defined, initializing the app continues as normal. """ loaded_modules = [] try: pkg = importlib.import_module(plugin_module) for loader, name, is_pkg in pkgutil.walk_packages(pkg.__path__): full_name = f"{plugin_module}.{name}" try: module = importlib.import_module(full_name) except Exception as e: raise ConfigurationError( f"Unexpected error while importing plugin: {plugin_module}.{name}: {str(e)}" ) loaded_modules.append(module) except ModuleNotFoundError as e: # This exception occurs when the plugin_module does not exist (not an error). logging.debug(f"No plugins found in module: {plugin_module}: {str(e)}") return loaded_modules
def check_attr(self, attrname, vtype): val = getattr(self, attrname) if type(vtype) in (list, tuple): if type(val) not in vtype: tnames = ",".join([x.__name__ for x in vtype]) raise ConfigurationError( f"Invalid type for attribute: {attrname}, expected types ({tnames}), got {type(val).__name__}" ) else: if type(val) != vtype: raise ConfigurationError( f"Invalid type for attribute: {attrname}, " f"expected type {vtype.__name__}, got {type(val).__name__}" ) self.attr_checked[attrname] = True
def handle_multi_dataset(self, context): self.__check_attr("multi_dataset__dataroot", (type(None), str)) self.__check_attr("multi_dataset__index", (type(None), bool, str)) self.__check_attr("multi_dataset__allowed_matrix_types", (tuple, list)) self.__check_attr("multi_dataset__matrix_cache__max_datasets", int) self.__check_attr("multi_dataset__matrix_cache__timelimit_s", (type(None), int, float)) if self.multi_dataset__dataroot is None: return # error checking for mtype in self.multi_dataset__allowed_matrix_types: try: MatrixDataType(mtype) except ValueError: raise ConfigurationError( f'Invalid matrix type in "allowed_matrix_types": {mtype}') # create the matrix data cache manager: if self.matrix_data_cache_manager is None: self.matrix_data_cache_manager = MatrixDataCacheManager( max_cached=self.multi_dataset__matrix_cache__max_datasets, timelimit_s=self.multi_dataset__matrix_cache__timelimit_s, )
def handle_authentication(self, context): self.check_attr("authentication__type", (type(None), str)) # oauth ptypes = str if self.authentication__type == "oauth" else (type(None), str) self.check_attr("authentication__params_oauth__api_base_url", ptypes) self.check_attr("authentication__params_oauth__client_id", ptypes) self.check_attr("authentication__params_oauth__client_secret", ptypes) self.check_attr("authentication__params_oauth__callback_base_url", (type(None), str)) self.check_attr("authentication__params_oauth__session_cookie", bool) if self.authentication__params_oauth__session_cookie: self.check_attr("authentication__params_oauth__cookie", (type(None), dict)) else: self.check_attr("authentication__params_oauth__cookie", dict) # secret key: first, from CXG_OAUTH_CLIENT_SECRET environment variable # second, from config file self.authentication__params__oauth__client_secret = os.environ.get( "CXG_OAUTH_CLIENT_SECRET", self.authentication__params_oauth__client_secret) self.auth = AuthTypeFactory.create(self.authentication__type, self) if self.auth is None: raise ConfigurationError( f"Unknown authentication type: {self.authentication__type}")
def handle_embeddings(self, context): self.check_attr("embeddings__names", list) self.check_attr("embeddings__enable_reembedding", bool) if self.app_config.server_config.single_dataset__datapath: if self.embeddings__enable_reembedding: matrix_data_loader = MatrixDataLoader( self.single_dataset__datapath, app_config=self.app_config) if matrix_data_loader.matrix_data_type( ) != MatrixDataType.H5AD: raise ConfigurationError( "'enable-reembedding is only supported with H5AD files." ) if self.adaptor__anndata_adaptor__backed: raise ConfigurationError( "enable-reembedding is not supported when run in --backed mode." )
def check_config(self): """Verify all the attributes in the config have been type checked""" if not self.is_completed: raise ConfigurationError( "The configuration has not been completed") self.server_config.check_config() self.dataset_config.check_config() self.external_config.check_config()
def check_config(self): """Verify all the attributes have been checked""" if not self.is_completed: raise ConfigurationError( "The configuration has not been completed") self.server_config.check_config() self.default_dataset_config.check_config() for dataset_config in self.dataroot_config.values(): dataset_config.check_config()
def update_from_config_file(self, config_file): with open(config_file) as fyaml: config = yaml.load(fyaml, Loader=yaml.FullLoader) mapping = self.__mapping(config) for attr, (key, value) in mapping.items(): if not hasattr(self, attr): raise ConfigurationError( f"Unknown key from config file: {key}") try: setattr(self, attr, value) except KeyError: raise ConfigurationError( f"Unable to set config attribute: {key}") self.attr_checked[attr] = False self.is_completed = False
def handle_single_dataset(self, context): self.validate_correct_type_of_configuration_attribute( "single_dataset__datapath", (str, type(None))) self.validate_correct_type_of_configuration_attribute( "single_dataset__title", (str, type(None))) self.validate_correct_type_of_configuration_attribute( "single_dataset__about", (str, type(None))) self.validate_correct_type_of_configuration_attribute( "single_dataset__obs_names", (str, type(None))) self.validate_correct_type_of_configuration_attribute( "single_dataset__var_names", (str, type(None))) # preload this data set matrix_data_loader = MatrixDataLoader(self.single_dataset__datapath, app_config=self.app_config) try: matrix_data_loader.pre_load_validation() except DatasetAccessError as e: raise ConfigurationError(str(e)) file_size = matrix_data_loader.file_size() file_basename = basename(self.single_dataset__datapath) if file_size > BIG_FILE_SIZE_THRESHOLD: context["messagefn"]( f"Loading data from {file_basename}, this may take a while...") else: context["messagefn"](f"Loading data from {file_basename}.") if self.single_dataset__about: def url_check(url): try: result = urlparse(url) if all([result.scheme, result.netloc]): return True else: return False except ValueError: return False if not url_check(self.single_dataset__about): raise ConfigurationError( "Must provide an absolute URL for --about. (Example format: http://example.com)" )
def update_single_config_from_path_and_value(self, path, value): """Update a single config parameter with the value. Path is a list of string, that gives a path to the config parameter to be updated. For example, path may be ["server","app","port"]. """ self.is_complete = False if not isinstance(path, list): raise ConfigurationError( f"path must be a list of strings, got '{str(path)}'") for part in path: if not isinstance(part, str): raise ConfigurationError( f"path must be a list of strings, got '{str(path)}'") if len(path) < 1 or path[0] not in ("server", "dataset", "per_dataset_config"): raise ConfigurationError( "path must start with 'server', 'dataset', or 'per_dataset_config'" ) if path[0] == "server": attr = "__".join(path[1:]) try: self.update_server_config(**{attr: value}) except ConfigurationError: raise ConfigurationError( f"unknown config parameter at path: '{str(path)}'") elif path[0] == "dataset": attr = "__".join(path[1:]) try: self.update_default_dataset_config(**{attr: value}) except ConfigurationError: raise ConfigurationError( f"unknown config parameter at path: '{str(path)}'") elif path[0] == "per_dataset_config": if len(path) < 2: raise ConfigurationError( f"missing dataroot when using per_dataset_config: got '{path}'" ) dataroot = path[1] if dataroot not in self.dataroot_config: dataroots = str(list(self.dataroot_config.keys())) raise ConfigurationError( f"unknown dataroot when using per_dataset_config: got '{path}'," f" dataroots specified in config are {dataroots}") attr = "__".join(path[2:]) try: self.dataroot_config[dataroot].update(**{attr: value}) except ConfigurationError: raise ConfigurationError( f"unknown config parameter at path: '{str(path)}'")
def __init__(self, app_config, default_config): super().__init__(app_config, default_config) try: self.environment = default_config["environment"] self.aws_secrets_manager__region = default_config[ "aws_secrets_manager"]["region"] self.aws_secrets_manager__secrets = default_config[ "aws_secrets_manager"]["secrets"] except KeyError as e: raise ConfigurationError(f"Unexpected config: {str(e)}")
def _validate_cookie_params(self): """check the cookie_params, and raise a ConfigurationError if there is something wrong""" if self.session_cookie: return if not isinstance(self.cookie_params, dict): raise ConfigurationError( "either session_cookie or cookie must be set") valid_keys = { "key", "max_age", "expires", "path", "domain", "secure", "httponly", "samesite" } keys = set(self.cookie_params.keys()) unknown = keys - valid_keys if unknown: raise ConfigurationError( f"unexpected key in cookie params: {', '.join(unknown)}") if "key" not in keys: raise ConfigurationError( "must have a key (name) in the cookie params")
def add_dataroot_config(self, dataroot_tag, **kw): """Create a new dataset config object based on the default dataset config, and kw parameters""" if dataroot_tag in self.dataroot_config: raise ConfigurationError( f"dataroot config already exists: {dataroot_tag}") if type(self.server_config.multi_dataset__dataroot) != dict: raise ConfigurationError( "The server__multi_dataset__dataroot must be a dictionary") if dataroot_tag not in self.server_config.multi_dataset__dataroot: raise ConfigurationError( f"The dataroot_tag ({dataroot_tag}) not found in server__multi_dataset__dataroot" ) self.is_completed = False self.dataroot_config[dataroot_tag] = DatasetConfig( dataroot_tag, self, self.default_config["dataset"]) flat_config = self.default_dataset_config.create_mapping( self.default_dataset_config.default_config) config = {key: value[1] for key, value in flat_config.items()} self.dataroot_config[dataroot_tag].update(**config) self.dataroot_config[dataroot_tag].update_from_config(kw, dataroot_tag)