def __init__(self, server, catalog_id, config_file, credentials, schema_name=None, table_name=None, verbose=False): self.config = json.load(open(config_file)) self.ignored_schema_patterns = [] self.verbose = verbose ip = self.config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.acl_specs = {"catalog_acl": self.config.get("catalog_acl")} for key in self.ACL_TYPES: if key != "catalog_acl": self.acl_specs[key] = self.make_speclist(key) self.groups = self.config.get("groups") self.expand_groups() self.acl_definitions = self.config.get("acl_definitions") self.expand_acl_definitions() self.acl_bindings = self.config.get("acl_bindings") self.invalidate_bindings = self.config.get("invalidate_bindings") self.server = server self.catalog_id = catalog_id old_catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.saved_toplevel_config = ConfigUtil.find_toplevel_node( old_catalog.getCatalogConfig(), schema_name, table_name) self.catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.toplevel_config = ConfigUtil.find_toplevel_node( self.catalog.getCatalogConfig(), schema_name, table_name)
class AclConfig: NC_NAME = 'name' GC_NAME = 'groups' ACL_TYPES = [ "catalog_acl", "schema_acls", "table_acls", "column_acls", "foreign_key_acls" ] GLOBUS_PREFIX = 'https://auth.globus.org/' def __init__(self, server, catalog_id, config_file, credentials, schema_name=None, table_name=None, verbose=False): self.config = json.load(open(config_file)) self.ignored_schema_patterns = [] self.verbose = verbose ip = self.config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.acl_specs = {"catalog_acl": self.config.get("catalog_acl")} for key in self.ACL_TYPES: if key != "catalog_acl": self.acl_specs[key] = self.make_speclist(key) self.groups = self.config.get("groups") self.expand_groups() self.acl_definitions = self.config.get("acl_definitions") self.expand_acl_definitions() self.acl_bindings = self.config.get("acl_bindings") self.invalidate_bindings = self.config.get("invalidate_bindings") self.server = server self.catalog_id = catalog_id old_catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.saved_toplevel_config = ConfigUtil.find_toplevel_node( old_catalog.getCatalogConfig(), schema_name, table_name) self.catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.toplevel_config = ConfigUtil.find_toplevel_node( self.catalog.getCatalogConfig(), schema_name, table_name) def make_speclist(self, name): d = self.config.get(name) if d is None: d = dict() return ACLSpecList(d) def add_node_acl(self, node, acl_name): acl = self.acl_definitions.get(acl_name) if acl is None: raise ValueError( "no acl set called '{name}'".format(name=acl_name)) for k in acl.keys(): node.acls[k] = acl[k] def add_node_acl_binding(self, node, table_node, binding_name): if not binding_name in self.acl_bindings: raise ValueError( "no acl binding called '{name}'".format(name=binding_name)) binding = self.acl_bindings.get(binding_name) try: node.acl_bindings[binding_name] = self.expand_acl_binding( binding, table_node) except NoForeignKeyError as e: detail = '' if isinstance(node, CatalogColumn): detail = 'on column {n}'.format(n=node.name) elif isinstance(node, CatalogForeignKey): detail = 'on foreign key {s}.{n}'.format(s=node.names[0][0], n=node.names[0][1]) else: detail = ' {t}'.format(t=type(node)) print("couldn't expand acl binding {b} {d} table {s}.{t}".format( b=binding_name, d=detail, s=table_node.sname, t=table_node.name)) raise e def expand_acl_binding(self, binding, table_node): if not isinstance(binding, dict): return binding new_binding = dict() for k in binding.keys(): if k == "projection": new_binding[k] = [] for proj in binding.get(k): new_binding[k].append( self.expand_projection(proj, table_node)) elif k == "scope_acl": new_binding[k] = self.get_group(binding.get(k)) else: new_binding[k] = binding[k] return new_binding def expand_projection(self, proj, table_node): if isinstance(proj, dict): new_proj = dict() is_first_outbound = True for k in proj.keys(): if k == "outbound_col": if is_first_outbound: is_first_outbound = False else: raise NotImplementedError( "don't know how to expand 'outbound_col' on anything but the first entry in a projection; " "use 'outbound' instead") if table_node is None: raise NotImplementedError( "don't know how to expand 'outbound_col' in a foreign key acl/annotation; use 'outbound' " "instead") new_proj["outbound"] = self.expand_projection_column( proj[k], table_node) if new_proj["outbound"] is None: return None else: new_proj[k] = proj[k] is_first_outbound = False return new_proj else: return proj def expand_projection_column(self, col_name, table_node): for fkey in table_node.foreign_keys: if len(fkey.foreign_key_columns) == 1: col = fkey.foreign_key_columns[0] if col.get("table_name") == table_node.name and col.get( "schema_name") == table_node.sname and col.get( "column_name") == col_name: return fkey.names[0] raise NoForeignKeyError("can't find foreign key for column %I.%I(%I)", table_node.sname, table_node.name, col_name) def set_node_acl_bindings(self, node, table_node, binding_list, invalidate_list): node.acl_bindings.clear() if binding_list is not None: for binding_name in binding_list: self.add_node_acl_binding(node, table_node, binding_name) if invalidate_list is not None: for binding_name in invalidate_list: if binding_list and binding_name in binding_list: raise ValueError( "Binding {b} appears in both acl_bindings and invalidate_bindings for table {s}.{t} node {n}" .format(b=binding_name, s=table_node.sname, t=table_node.name, n=node.name)) node.acl_bindings[binding_name] = False def save_groups(self): glt = self.create_or_validate_group_table() if glt is not None and self.groups is not None: rows = [] for name in self.groups.keys(): row = {'name': name, 'groups': self.groups.get(name)} for c in ['RCB', 'RMB']: if glt.getColumn(c) is not None: row[c] = None rows.append(row) glt.upsertRows(self.catalog, rows) def create_or_validate_schema(self, schema_name): schema = self.catalog.getCatalogSchema()['schemas'].get(schema_name) if schema is None: self.catalog.post("/schema/{s}".format(s=schema_name)) return self.catalog.getCatalogSchema()['schemas'].get(schema_name) def create_table(self, schema_name, table_name, table_spec, comment=None): if table_spec is None: table_spec = dict() if schema_name is None: return None table_spec["schema_name"] = schema_name table_spec["table_name"] = table_name if table_spec.get('comment') is None and comment is not None: table_spec['comment'] = comment if table_spec.get('kind') is None: table_spec['kind'] = 'table' self.catalog.post("/schema/{s}/table".format(s=schema_name), json=table_spec) schema = self.catalog.getCatalogSchema()['schemas'].get(schema_name) return schema['tables'].get(table_name) def create_or_validate_group_table(self): glt_spec = self.config.get('group_list_table') if glt_spec is None: return None sname = glt_spec.get('schema') tname = glt_spec.get('table') if sname is None or tname is None: raise ValueError("group_list_table missing schema or table") schema = self.create_or_validate_schema(sname) assert schema is not None glt = Table(schema['tables'].get(tname)) if glt == {}: glt_spec = { 'comment': "Named lists of groups used in ACLs. Maintained by the rbk_acls program. " "Do not update this table manually.", 'annotations': { 'tag:isrd.isi.edu,2016:generated': None }, 'column_definitions': [{ 'name': self.NC_NAME, 'type': { 'typename': 'text' }, 'nullok': False, 'comment': 'Name of grouplist, used in foreign keys. This table is maintained by the rbk_acls ' 'program and should not be updated by hand.' }, { 'name': self.GC_NAME, 'type': { 'base_type': { 'typename': 'text' }, 'is_array': True }, 'nullok': True, 'comment': 'List of groups. This table is maintained by the rbk_acls program and should not be ' 'updated by hand.' }], 'keys': [{ 'names': [[sname, "{t}_{c}_u".format(t=tname, c=self.NC_NAME)]], 'unique_columns': [self.NC_NAME] }] } glt = Table(self.create_table(sname, tname, glt_spec)) else: name_col = glt.getColumn(self.NC_NAME) if name_col is None: raise ValueError( 'table specified for group lists ({s}.{t}) lacks a "{n}" column' .format(s=sname, t=tname, n=self.NC_NAME)) if name_col.get('nullok'): raise ValueError( "{n} column in group list table ({s}.{t}) allows nulls". format(n=self.NC_NAME, s=sname, t=tname)) nc_uniq = False for key in glt.get('keys'): cols = key.get('unique_columns') if len(cols) == 1 and cols[0] == self.NC_NAME: nc_uniq = True break if nc_uniq: break if not nc_uniq: raise ValueError( "{n} column in group list table ({s}.{t}) is not a key". format(n=self.NC_NAME, s=sname, t=tname)) val_col = glt.getColumn(self.GC_NAME) if val_col is None: raise ValueError( 'table specified for group lists ({s}.{t}) lacks a "{n}" column' .format(s=sname, t=tname, n=self.GC_NAME)) if glt == {}: return None else: return glt def set_node_acl(self, node, spec): node.acls.clear() acl_name = spec.get("acl") if acl_name is not None: self.add_node_acl(node, acl_name) def expand_groups(self): for group_name in self.groups.keys(): self.expand_group(group_name) def get_group(self, group_name): group = self.groups.get(group_name) if group is None: group = [group_name] return group def validate_group(self, group): if group == '*': return elif group.startswith(self.GLOBUS_PREFIX): self.validate_globus_group(group) else: raise ValueError( "Can't determine format of group '{g}'".format(g=group)) def validate_globus_group(self, group): guid = group[len(self.GLOBUS_PREFIX):] try: UUID(guid) except ValueError: raise ValueError( "Group '{g}' appears to be a malformed Globus group".format( g=group)) if self.verbose: print( "group '{g}' appears to be a syntactically-correct Globus group" .format(g=group)) def expand_group(self, group_name): groups = [] for child_name in self.groups.get(group_name): child = self.groups.get(child_name) if child is None: self.validate_group(child_name) groups.append(child_name) else: self.expand_group(child_name) groups = groups + self.groups[child_name] self.groups[group_name] = list(set(groups)) def expand_acl_definitions(self): for acl_name in self.acl_definitions.keys(): self.expand_acl_definition(acl_name) def expand_acl_definition(self, acl_name): spec = self.acl_definitions.get(acl_name) for op_type in spec.keys(): groups = [] raw_groups = spec[op_type] if isinstance(raw_groups, list): for group_name in spec[op_type]: groups = groups + self.get_group(group_name) else: groups = self.get_group(raw_groups) spec[op_type] = groups def set_table_acls(self, table): spec = self.acl_specs["table_acls"].find_best_table_spec( table.sname, table.name) table.acls.clear() table.acl_bindings.clear() if spec is not None: self.set_node_acl(table, spec) self.set_node_acl_bindings(table, table, spec.get("acl_bindings"), spec.get("invalidate_bindings")) if self.verbose: print("set table {s}.{t} acls to {a}, bindings to {b}".format( s=table.sname, t=table.name, a=str(table.acls), b=str(table.acl_bindings))) for column in table.column_definitions: self.set_column_acls(column, table) for fkey in table.foreign_keys: self.set_fkey_acls(fkey, table) def set_column_acls(self, column, table): spec = self.acl_specs["column_acls"].find_best_column_spec( column.sname, column.tname, column.name) column.acls.clear() column.acl_bindings.clear() if spec is not None: self.set_node_acl(column, spec) self.set_node_acl_bindings(column, table, spec.get("acl_bindings"), spec.get("invalidate_bindings")) if self.verbose: print("set column {s}.{t}.{c} acls to {a}, bindings to {b}".format( s=column.sname, t=column.tname, c=column.name, a=str(column.acls), b=str(column.acl_bindings))) def set_fkey_acls(self, fkey, table): spec = self.acl_specs["foreign_key_acls"].find_best_foreign_key_spec( fkey.sname, fkey.tname, fkey.names) fkey.acls.clear() fkey.acl_bindings.clear() if spec is not None: self.set_node_acl(fkey, spec) self.set_node_acl_bindings(fkey, table, spec.get("acl_bindings")) if self.verbose: print("set fkey {f} acls to {a}, bindings to {b}".format( f=str(fkey.names), a=str(fkey.acls), b=str(fkey.acl_bindings))) def set_catalog_acls(self, catalog): spec = self.acl_specs["catalog_acl"] if spec is not None: catalog.acls.clear() self.set_node_acl(catalog, spec) if self.verbose: print("set catalog acls to {a}".format(a=str(catalog.acls))) for schema in self.toplevel_config.schemas.values(): self.set_schema_acls(schema) def set_schema_acls(self, schema): for pattern in self.ignored_schema_patterns: if pattern.match(schema.name) is not None: print("ignoring schema {s}".format(s=schema.name)) return spec = self.acl_specs["schema_acls"].find_best_schema_spec(schema.name) schema.acls.clear() if spec is not None: self.set_node_acl(schema, spec) if self.verbose: print("set schema {s} acls to {a}".format(s=schema.name, a=str(schema.acls))) for table in schema.tables.values(): self.set_table_acls(table) def set_acls(self): if isinstance(self.toplevel_config, ermrest_config.CatalogConfig): self.set_catalog_acls(self.toplevel_config) elif isinstance(self.toplevel_config, ermrest_config.CatalogSchema): self.set_schema_acls(self.toplevel_config) elif isinstance(self.toplevel_config, ermrest_config.CatalogTable): self.set_table_acls(self.toplevel_config) else: raise ValueError("toplevel config is a {t}".format( t=str(type(self.toplevel_config)))) def apply_acls(self): self.toplevel_config.apply(self.catalog, self.saved_toplevel_config) def dumps(self): """Dump a serialized (string) representation of the config. """ return json.dumps(self.toplevel_config.prejson(), indent=2)
class Annotations: def __init__(self, server, catalog, credentials, config): self.annotations = {} self.ignored_schema_patterns = [] ip = config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.types = set() self.managed_attributes = [] self.ignore_unmanaged = True self.ignored_attributes = [] self.consolidated_annotations = {} self.annotations_to_delete = [] if config is not None: known_attributes = config.get("known_attributes") if known_attributes is not None: self.managed_attributes = known_attributes.get("managed", []) self.ignored_attributes = known_attributes.get("ignored", []) self.annotations_to_delete = known_attributes.get("to_delete", []) self.ignore_unmanaged = known_attributes.get("ignore_all_unmanaged", True) self.annotations["known_attributes"] = known_attributes else: self.annotations["known_attributes"] = {'managed': [], 'ignore_all_unmanaged': True} self.consolidated_annotations = config.get("consolidated_annotations", {}) for k in AttrSpecList.SPEC_TYPES: d = self.consolidated_annotations.get(k) if d is None: d = [dict()] self.consolidated_annotations[k] = AttrSpecList(known_attributes, d) self.annotations[k] = self.munge_specs(self.consolidated_annotations[k]) for k in self.managed_attributes: if k in self.annotations_to_delete: raise ValueError("{k} is both 'managed' and 'to_delete'".format(k=k)) self.catalog = ErmrestCatalog('https', server, catalog, credentials) self.catalog_config = self.catalog.getCatalogConfig() if self.catalog_config.annotations is not None: self.add_catalog_annotations(self.catalog_config) if self.catalog_config.schemas is not None: for s in self.catalog_config.schemas.values(): self.add_schema_annotations(s) def munge_specs(self, annotation_list): speclist = [] if annotation_list is not None: if isinstance(annotation_list, AttrSpecList): annotation_list = annotation_list.get_specs() for spec in annotation_list: speclist.append(spec.config_format()) return speclist def consolidated_schema_annotation(self, annotation): matches = [] for c in self.consolidated_annotations["schema_annotations"].get_specs(): if c.schema_entry_matches(annotation.get("schema"), key=annotation.get("uri")): matches.append(c) return self.check_consolidation(matches, annotation.get("value")) def consolidated_table_annotation(self, annotation): matches = [] for c in self.consolidated_annotations["table_annotations"].get_specs(): if c.table_entry_matches(annotation.get("schema"), annotation.get("table"), key=annotation.get("uri")): matches.append(c) return self.check_consolidation(matches, annotation.get("value")) def consolidated_column_annotation(self, annotation): matches = [] for c in self.consolidated_annotations["column_annotations"].get_specs(): if c.column_entry_matches(annotation.get("schema"), annotation.get("table"), annotation.get("column"), key=annotation.get("uri")): matches.append(c) return self.check_consolidation(matches, annotation.get("value")) def consolidated_foreign_key_annotation(self, annotation): matches = [] for c in self.consolidated_annotations["foreign_key_annotations"].get_specs(): if c.foreign_key_entry_matches(annotation.get("schema"), annotation.get("table"), annotation.get("foreign_key_schema"), annotation.get("foreign_key"), key=annotation.get("uri")): matches.append(c) return self.check_consolidation(matches, annotation.get("value")) def check_consolidation(self, matches, value): if len(matches) != 1: # Zero or more than one matching pattern, so we need the exact spec to disambiguate return False match = matches[0] # if match.get("override") == True: # # We don't care what the original version was. We want to go with the pattern match # return True return match.get("value") == value def add_catalog_annotations(self, catalog): annotations = self.find_relevant_annotations(catalog.annotations) if annotations is not None: for v in annotations: self.annotations["catalog_annotations"].append(v) def add_schema_annotations(self, schema): annotations = self.find_relevant_annotations(schema.annotations) if annotations is not None: for v in annotations: v["schema"] = schema.name if not self.consolidated_schema_annotation(v): self.annotations["schema_annotations"].append(v) for table in schema.tables.values(): self.add_table_annotations(table) def add_table_annotations(self, table): annotations = self.find_relevant_annotations(table.annotations) if annotations is not None: for v in annotations: v["schema"] = table.sname v["table"] = table.name if not self.consolidated_table_annotation(v): self.annotations["table_annotations"].append(v) for column in table.column_definitions: self.add_column_annotations(table, column) for fkey in table.foreign_keys: self.add_foreign_key_annotations(fkey) def add_column_annotations(self, table, column): annotations = self.find_relevant_annotations(column.annotations) if annotations is not None: for v in annotations: v["schema"] = table.sname v["table"] = table.name v["column"] = column.name if not self.consolidated_column_annotation(v): self.annotations["column_annotations"].append(v) def add_foreign_key_annotations(self, fkey): annotations = self.find_relevant_annotations(fkey.annotations) if annotations is not None: if len(fkey.names) < 1: raise ValueError("foreign key without a name") for v in annotations: v["schema"] = fkey.sname v["table"] = fkey.tname v["foreign_key_schema"] = fkey.names[0][0] v["foreign_key"] = fkey.names[0][1] if not self.consolidated_foreign_key_annotation(v): self.annotations["foreign_key_annotations"].append(v) def find_relevant_annotations(self, annotations): if annotations is None or len(annotations) == 0: return None new = [] if self.managed_attributes is None: for k in annotations.keys(): if k not in self.annotations_to_delete: new.append({"uri": k, "value": annotations[k]}) self.types.add(k) else: for k in annotations.keys(): if k in self.managed_attributes: new.append({"uri": k, "value": annotations[k]}) self.types.add(k) if len(new) == 0: return None return new def dumps(self): return json.dumps(self.annotations, indent=4, sort_keys=True) def types_list(self): types = list(self.types) types.sort() return types