def __init__(self, server, catalog_id, config_file, credentials, verbose=False, schema_name=None, table_name=None): self.config = json.load(open(config_file)) self.ignored_schema_patterns = [] ip = self.config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.known_attrs = self.config.get(u'known_attributes') self.managed_annotations = self.known_attrs.get(u'managed') self.known_annotations = self.managed_annotations self.all_annotations = self.known_annotations self.ignored_annotations = self.known_attrs.get(u'ignored') if self.ignored_annotations is not None: self.all_annotations = self.all_annotations + self.ignored_annotations self.ignore_unmanaged = self.known_attrs.get(u'ignore_all_unmanaged') self.annotation_specs = dict() for key in AttrSpecList.SPEC_TYPES: self.annotation_specs[key] = self.make_speclist(key) self.server = server self.catalog_id = catalog_id self.verbose = verbose old_catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.saved_toplevel_config = ConfigUtil.find_toplevel_node(old_catalog.getCatalogModel(), schema_name, table_name) self.catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.toplevel_config = ConfigUtil.find_toplevel_node(self.catalog.getCatalogModel(), schema_name, table_name)
def main(servername, credentialsfilename, catalog): def apply(catalog, goal): """ Apply the goal configuration to live catalog """ print 'applying...' counter = 0 ready = False while ready == False: try: catalog.applyCatalogConfig(goal) ready = True except HTTPError as err: print err print err.errno if err.errno == CONFLICT: et, ev, tb = sys.exc_info() print 'Conflict Exception "%s"' % str(ev) counter = counter + 1 if counter >= 5: print '%s' % str(traceback.format_exception(et, ev, tb)) ready = True else: print 'Retrying...' except: et, ev, tb = sys.exc_info() print str(et) print 'Exception "%s"' % str(ev) print '%s' % str(traceback.format_exception(et, ev, tb)) ready = True credentials = json.load(open(credentialsfilename)) catalog = ErmrestCatalog('https', servername, catalog, credentials) try: goal = catalog.get_catalog_model() except AttributeError: try: goal = catalog.getCatalogModel() except: et, ev, tb = sys.exc_info() print 'got exception "%s"' % str(ev) print '%s' % str(traceback.format_exception(et, ev, tb)) sys.exit(1) schema_name = 'Microscopy' table_name = 'Slide' column_name = 'Label' goal.column(schema_name, table_name, column_name).column_display.update({ "compact": {"markdown_pattern": "{{#Label}}:::iframe [](/chaise/PrintLabel.html?label=/microscopy/printer/slide/job?{{{Label}}}){height=75 width=150 style=\"border-style: none; border-color: rgb(153, 153, 153);\" .iframe} \n:::{{/Label}}"}, "detailed": {"markdown_pattern": "{{#Label}}:::iframe [](/chaise/PrintLabel.html?label=/microscopy/printer/slide/job?{{{Label}}}){height=75 width=150 style=\"border-style: none; border-color: rgb(153, 153, 153);\" .iframe} \n:::{{/Label}}"} }) apply(catalog, goal) print 'Successfully updated the URL annotation for the column %s' % column_name
def main(): parser = argparse.ArgumentParser(description="Configure an Ermrest Catalog") parser.add_argument('server', help='Catalog server name') parser.add_argument('--catalog-id', default=1, help="ID number of desired catalog (Default:1)") parser.add_argument('--catalog-name', default=None, help="Name of catalog (Default:hostname)") parser.add_argument("--catalog", action='store_true', help='Configure a catalog') parser.add_argument("--schema", help='Name of schema to configure'), parser.add_argument('--table', default=None, metavar='SCHEMA_NAME:TABLE_NAME', help='Name of table to be configured') parser.add_argument('--set-policy', default='True', choices=[True, False], help='Access control policy to be applied to catalog or table') parser.add_argument('--reader-group', dest='reader', default=None, help='Group name to use for readers. For a catalog named "foo" defaults for foo-reader') parser.add_argument('--writer-group', dest='writer', default=None, help='Group name to use for writers. For a catalog named "foo" defaults for foo-writer') parser.add_argument('--curator-group', dest='curator', default=None, help='Group name to use for readers. For a catalog named "foo" defaults for foo-curator') parser.add_argument('--admin-group', dest='admin', default=None, help='Group name to use for readers. For a catalog named "foo" defaults for foo-admin') parser.add_argument('--publish', default=False, action='store_true', help='Make the catalog or table accessible for reading without logging in') args = parser.parse_args() credentials = get_credential(args.server) catalog = ErmrestCatalog('https', args.server, args.catalog_id, credentials=credentials) try: if args.catalog: print('Configuring catalog {}:{}'.format(args.server, args.catalog_id)) configure_baseline_catalog(catalog, catalog_name=args.catalog_name, reader=args.reader, writer=args.writer, curator=args.curator, admin=args.admin, set_policy=args.set_policy, anonymous=args.publish) if args.table: [schema_name, table_name] = args.table.split(':') table = catalog.getCatalogModel().schemas[schema_name].tables[table_name] configure_table_defaults(catalog, table, set_policy=args.set_policy, anonymous=args.publish) except DerivaConfigError as e: print(e.msg) return
def init_variables(catalog_num=1): server = 'pbcconsortium.isrd.isi.edu' credential = get_credential(server) catalog = ErmrestCatalog('https', server, catalog_num, credentials=credential) model_root = catalog.getCatalogModel() __main__.catalog = catalog __main__.model_root = model_root # Get references to main tables for manipulating the model. __main__.Experiment = model_root.table('Beta_Cell', 'Experiment') __main__.Specimen = model_root.table('Beta_Cell', 'Specimen') __main__.Biosample = model_root.table('Beta_Cell', 'Biosample') __main__.Dataset = model_root.table('Beta_Cell', 'Dataset') __main__.imaging_data = model_root.table('isa', 'imaging_data') __main__.model = model_root.table("viz", 'model') # Get references to the main tables for managing their contents using DataPath library pb = catalog.getPathBuilder() # Get main schema isa = pb.isa viz = pb.viz vocab = pb.vocab Beta_Cell = pb.Beta_Cell __main__.pb = pb __main__.isa = isa __main__.vocab = vocab # Get tables.... __main__.Experiment_dp = Beta_Cell.Experiment __main__.Biosample_dp = Beta_Cell.Biosample __main__.dataset_dp = Beta_Cell.Dataset __main__.XRay_Tomography_dp = Beta_Cell.XRay_Tomography_Data __main__.Specimen_dp = Beta_Cell.Specimen __main__.model_dp = viz.model
def main(): parser = argparse.ArgumentParser( description="Configure an Ermrest Catalog") parser.add_argument('server', help='Catalog server name') parser.add_argument('--catalog-id', default=1, help="ID number of desired catalog (Default:1)") parser.add_argument('--table', default=None, metavar='SCHEMA_NAME:TABLE_NAME', help='Name of table to be configured') parser.add_argument( '--asset-table', default=None, metavar='KEY_COLUMN', help='Create an asset table linked to table on key_column') parser.add_argument( '--config', default=None, help='python script to set up configuration variables)') args = parser.parse_args() credentials = get_credential(args.server) catalog = ErmrestCatalog('https', args.server, args.catalog_id, credentials=credentials) if args.table: [schema_name, table_name] = args.table.split(':') table = catalog.getCatalogModel( ).schemas[schema_name].tables[table_name] if args.asset_table: if not args.table: print('Creating asset table requires specification of a table') exit(1) create_asset_table(catalog, table, args.asset_table)
def __init__(self, server, catalog_id, config_file, credentials, schema_name=None, table_name=None, verbose=False): self.config = json.load(open(config_file)) self.ignored_schema_patterns = [] self.verbose = verbose self.server = server self.catalog_id = catalog_id ip = self.config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.acl_specs = {"catalog_acl": self.config.get("catalog_acl")} for key in self.ACL_TYPES: if key != "catalog_acl": self.acl_specs[key] = self.make_speclist(key) self.groups = self.config.get("groups") self.expand_groups() self.acl_definitions = self.config.get("acl_definitions") self.expand_acl_definitions() self.acl_bindings = self.config.get("acl_bindings") self.invalidate_bindings = self.config.get("invalidate_bindings") old_catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.saved_toplevel_config = ConfigUtil.find_toplevel_node( old_catalog.getCatalogModel(), schema_name, table_name) self.catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.toplevel_config = ConfigUtil.find_toplevel_node( self.catalog.getCatalogModel(), schema_name, table_name)
from deriva.core import HatracStore, ErmrestCatalog, get_credential, DerivaPathError import deriva.core.ermrest_model as em import csv import re # Load basic data elements from CSV file for initial XRAY-Tomography Run. # Create connection to the PBC server server = 'pbcconsortium.isrd.isi.edu' credential = get_credential(server) catalog = ErmrestCatalog('https', server, 1, credentials=credential) model_root = catalog.getCatalogModel() # Get references to main tables for manipulating the model. experiment = model_root.table('isa', 'experiment') biosample = model_root.table('isa', 'biosample') dataset = model_root.table('isa', 'dataset') protocol = model_root.table('isa','protocol') replicate = model_root.table('isa','replicate') imaging_data = model_root.table('isa','imaging_data') model = model_root.table("viz", 'model') # Get references to the main tables for managing their contents using DataPath library pb = catalog.getPathBuilder() # Get main schema isa = pb.isa viz = pb.viz
class AclConfig: NC_NAME = 'name' GC_NAME = 'groups' ACL_TYPES = [ "catalog_acl", "schema_acls", "table_acls", "column_acls", "foreign_key_acls" ] GLOBUS_PREFIX = 'https://auth.globus.org/' ROBOT_PREFIX_FORMAT = 'https://{server}/webauthn_robot/' def __init__(self, server, catalog_id, config_file, credentials, schema_name=None, table_name=None, verbose=False): self.config = json.load(open(config_file)) self.ignored_schema_patterns = [] self.verbose = verbose self.server = server self.catalog_id = catalog_id ip = self.config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.acl_specs = {"catalog_acl": self.config.get("catalog_acl")} for key in self.ACL_TYPES: if key != "catalog_acl": self.acl_specs[key] = self.make_speclist(key) self.groups = self.config.get("groups") self.expand_groups() self.acl_definitions = self.config.get("acl_definitions") self.expand_acl_definitions() self.acl_bindings = self.config.get("acl_bindings") self.invalidate_bindings = self.config.get("invalidate_bindings") old_catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.saved_toplevel_config = ConfigUtil.find_toplevel_node( old_catalog.getCatalogModel(), schema_name, table_name) self.catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.toplevel_config = ConfigUtil.find_toplevel_node( self.catalog.getCatalogModel(), schema_name, table_name) def make_speclist(self, name): d = self.config.get(name) if d is None: d = dict() return ACLSpecList(d) def add_node_acl(self, node, acl_name): acl = self.acl_definitions.get(acl_name) if acl is None: raise ValueError( "no acl set called '{name}'".format(name=acl_name)) for k in acl.keys(): node.acls[k] = acl[k] def add_node_acl_binding(self, node, table_node, binding_name): if not binding_name in self.acl_bindings: raise ValueError( "no acl binding called '{name}'".format(name=binding_name)) binding = self.acl_bindings.get(binding_name) try: node.acl_bindings[binding_name] = self.expand_acl_binding( binding, table_node) except NoForeignKeyError as e: detail = '' if isinstance(node, ermrest_model.Column): detail = 'on column {n}'.format(n=node.name) elif isinstance(node, ermrest_model.ForeignKey): detail = 'on foreign key {s}.{n}'.format(s=node.names[0][0], n=node.names[0][1]) else: detail = ' {t}'.format(t=type(node)) print("couldn't expand acl binding {b} {d} table {s}.{t}".format( b=binding_name, d=detail, s=table_node.schema.name, t=table_node.name)) raise e def expand_acl_binding(self, binding, table_node): if not isinstance(binding, dict): return binding new_binding = dict() for k in binding.keys(): if k == "projection": new_binding[k] = [] for proj in binding.get(k): new_binding[k].append( self.expand_projection(proj, table_node)) elif k == "scope_acl": new_binding[k] = self.get_group(binding.get(k)) else: new_binding[k] = binding[k] return new_binding def expand_projection(self, proj, table_node): if isinstance(proj, dict): new_proj = dict() is_first_outbound = True for k in proj.keys(): if k == "outbound_col": if is_first_outbound: is_first_outbound = False else: raise NotImplementedError( "don't know how to expand 'outbound_col' on anything but the first entry in a projection; " "use 'outbound' instead") if table_node is None: raise NotImplementedError( "don't know how to expand 'outbound_col' in a foreign key acl/annotation; use 'outbound' " "instead") new_proj["outbound"] = self.expand_projection_column( proj[k], table_node) if new_proj["outbound"] is None: return None else: new_proj[k] = proj[k] is_first_outbound = False return new_proj else: return proj def expand_projection_column(self, col_name, table_node): for fkey in table_node.foreign_keys: if len(fkey.foreign_key_columns) == 1: col = fkey.foreign_key_columns[0] if col.get("table_name") == table_node.name and col.get( "schema_name") == table_node.schema.name and col.get( "column_name") == col_name: return fkey.names[0] raise NoForeignKeyError("can't find foreign key for column %I.%I(%I)", table_node.schema.name, table_node.name, col_name) def set_node_acl_bindings(self, node, table_node, binding_list, invalidate_list): node.acl_bindings.clear() if binding_list is not None: for binding_name in binding_list: self.add_node_acl_binding(node, table_node, binding_name) if invalidate_list is not None: for binding_name in invalidate_list: if binding_list and binding_name in binding_list: raise ValueError( "Binding {b} appears in both acl_bindings and invalidate_bindings for table {s}.{t} node {n}" .format(b=binding_name, s=table_node.schema.name, t=table_node.name, n=node.name)) node.acl_bindings[binding_name] = False def save_groups(self): glt = self.create_or_validate_group_table() if glt is not None and self.groups is not None: rows = [] for name in self.groups.keys(): row = {'name': name, 'groups': self.groups.get(name)} for c in ['RCB', 'RMB']: if glt.getColumn(c) is not None: row[c] = None rows.append(row) glt.upsertRows(self.catalog, rows) def create_or_validate_schema(self, schema_name): schema = self.catalog.getCatalogSchema()['schemas'].get(schema_name) if schema is None: self.catalog.post("/schema/{s}".format(s=schema_name)) return self.catalog.getCatalogSchema()['schemas'].get(schema_name) def create_table(self, schema_name, table_name, table_spec, comment=None): if table_spec is None: table_spec = dict() if schema_name is None: return None table_spec["schema_name"] = schema_name table_spec["table_name"] = table_name if table_spec.get('comment') is None and comment is not None: table_spec['comment'] = comment if table_spec.get('kind') is None: table_spec['kind'] = 'table' self.catalog.post("/schema/{s}/table".format(s=schema_name), json=table_spec) schema = self.catalog.getCatalogSchema()['schemas'].get(schema_name) return schema['tables'].get(table_name) def create_or_validate_group_table(self): glt_spec = self.config.get('group_list_table') if glt_spec is None: return None sname = glt_spec.get('schema') tname = glt_spec.get('table') if sname is None or tname is None: raise ValueError("group_list_table missing schema or table") schema = self.create_or_validate_schema(sname) assert schema is not None glt = Table(schema['tables'].get(tname)) if glt == {}: glt_spec = ermrest_model.Table.define( tname, column_defs=[ ermrest_model.Column.define( self.NC_NAME, ermrest_model.builtin_types.text, nullok=False, comment= 'Name of grouplist, used in foreign keys. This table is maintained by the acl-config ' 'program and should not be updated by hand.'), ermrest_model.Column.define( self.GC_NAME, ermrest_model.builtin_types['text[]'], nullok=True, comment= 'List of groups. This table is maintained by the acl-config program and should not be ' 'updated by hand.') ], key_defs=[ ermrest_model.Key.define([self.NC_NAME], constraint_names=[[ sname, "{t}_{c}_u".format( t=tname, c=self.NC_NAME) ]]) ], comment= "Named lists of groups used in ACLs. Maintained by the acl-config program. Do not update this " "table manually.", annotations={'tag:isrd.isi.edu,2016:generated': None}) glt = Table(self.create_table(sname, tname, glt_spec)) else: name_col = glt.getColumn(self.NC_NAME) if name_col is None: raise ValueError( 'table specified for group lists ({s}.{t}) lacks a "{n}" column' .format(s=sname, t=tname, n=self.NC_NAME)) if name_col.get('nullok'): raise ValueError( "{n} column in group list table ({s}.{t}) allows nulls". format(n=self.NC_NAME, s=sname, t=tname)) nc_uniq = False for key in glt.get('keys'): cols = key.get('unique_columns') if len(cols) == 1 and cols[0] == self.NC_NAME: nc_uniq = True break if not nc_uniq: raise ValueError( "{n} column in group list table ({s}.{t}) is not a key". format(n=self.NC_NAME, s=sname, t=tname)) val_col = glt.getColumn(self.GC_NAME) if val_col is None: raise ValueError( 'table specified for group lists ({s}.{t}) lacks a "{n}" column' .format(s=sname, t=tname, n=self.GC_NAME)) if glt == {}: return None else: return glt def set_node_acl(self, node, spec): node.acls.clear() acl_name = spec.get("acl") if acl_name is not None: self.add_node_acl(node, acl_name) def expand_groups(self): for group_name in self.groups.keys(): self.expand_group(group_name) def get_group(self, group_name): group = self.groups.get(group_name) if group is None: group = [group_name] return group def validate_group(self, group): if group == '*': return elif group.startswith(self.GLOBUS_PREFIX): self.validate_globus_group(group) elif group.startswith( self.ROBOT_PREFIX_FORMAT.format(server=self.server)): self.validate_webauthn_robot(group) else: warnings.warn( "Can't determine format of group '{g}'".format(g=group)) def validate_globus_group(self, group): guid = group[len(self.GLOBUS_PREFIX):] try: UUID(guid) except ValueError: raise ValueError( "Group '{g}' appears to be a malformed Globus group".format( g=group)) if self.verbose: print( "group '{g}' appears to be a syntactically-correct Globus group" .format(g=group)) def validate_webauthn_robot(self, group): robot_name = group[ len(self.ROBOT_PREFIX_FORMAT.format(server=self.server)):] if not robot_name: raise ValueError( "Group '{g}' appears to be a malformed webauthn robot identity" .format(g=group)) if self.verbose: print( "group '{g}' appears to be a syntactically-correct webauthn robot identity" .format(g=group)) def expand_group(self, group_name): groups = [] for child_name in self.groups.get(group_name): child = self.groups.get(child_name) if child is None: self.validate_group(child_name) groups.append(child_name) else: self.expand_group(child_name) groups = groups + self.groups[child_name] self.groups[group_name] = list(set(groups)) def expand_acl_definitions(self): for acl_name in self.acl_definitions.keys(): self.expand_acl_definition(acl_name) def expand_acl_definition(self, acl_name): spec = self.acl_definitions.get(acl_name) for op_type in spec.keys(): groups = [] raw_groups = spec[op_type] if isinstance(raw_groups, list): for group_name in spec[op_type]: groups = groups + self.get_group(group_name) else: groups = self.get_group(raw_groups) spec[op_type] = groups def set_table_acls(self, table): spec = self.acl_specs["table_acls"].find_best_table_spec( table.schema.name, table.name) table.acls.clear() table.acl_bindings.clear() if spec is not None: self.set_node_acl(table, spec) self.set_node_acl_bindings(table, table, spec.get("acl_bindings"), spec.get("invalidate_bindings")) if self.verbose: print("set table {s}.{t} acls to {a}, bindings to {b}".format( s=table.schema.name, t=table.name, a=str(table.acls), b=str(table.acl_bindings))) for column in table.column_definitions: self.set_column_acls(column, table) for fkey in table.foreign_keys: self.set_fkey_acls(fkey, table) def set_column_acls(self, column, table): spec = self.acl_specs["column_acls"].find_best_column_spec( column.table.schema.name, column.table.name, column.name) column.acls.clear() column.acl_bindings.clear() if spec is not None: self.set_node_acl(column, spec) self.set_node_acl_bindings(column, table, spec.get("acl_bindings"), spec.get("invalidate_bindings")) if self.verbose: print("set column {s}.{t}.{c} acls to {a}, bindings to {b}".format( s=column.table.schema.name, t=column.table.name, c=column.name, a=str(column.acls), b=str(column.acl_bindings))) def set_fkey_acls(self, fkey, table): spec = self.acl_specs["foreign_key_acls"].find_best_foreign_key_spec( fkey.table.schema.name, fkey.table.name, fkey.names) fkey.acls.clear() fkey.acl_bindings.clear() if spec is not None: self.set_node_acl(fkey, spec) self.set_node_acl_bindings(fkey, table, spec.get("acl_bindings")) if self.verbose: print("set fkey {f} acls to {a}, bindings to {b}".format( f=str(fkey.names), a=str(fkey.acls), b=str(fkey.acl_bindings))) def set_catalog_acls(self, catalog): spec = self.acl_specs["catalog_acl"] if spec is not None: catalog.acls.clear() self.set_node_acl(catalog, spec) if self.verbose: print("set catalog acls to {a}".format(a=str(catalog.acls))) for schema in self.toplevel_config.schemas.values(): self.set_schema_acls(schema) def set_schema_acls(self, schema): for pattern in self.ignored_schema_patterns: if pattern.match(schema.name) is not None: print("ignoring schema {s}".format(s=schema.name)) return spec = self.acl_specs["schema_acls"].find_best_schema_spec(schema.name) schema.acls.clear() if spec is not None: self.set_node_acl(schema, spec) if self.verbose: print("set schema {s} acls to {a}".format(s=schema.name, a=str(schema.acls))) for table in schema.tables.values(): self.set_table_acls(table) def set_acls(self): if isinstance(self.toplevel_config, ermrest_model.Model): self.set_catalog_acls(self.toplevel_config) elif isinstance(self.toplevel_config, ermrest_model.Schema): self.set_schema_acls(self.toplevel_config) elif isinstance(self.toplevel_config, ermrest_model.Table): self.set_table_acls(self.toplevel_config) else: raise ValueError("toplevel config is a {t}".format( t=str(type(self.toplevel_config)))) def apply_acls(self): self.toplevel_config.apply(self.saved_toplevel_config) def dumps(self): """Dump a serialized (string) representation of the config. """ return json.dumps(self.toplevel_config.prejson(), indent=2)
def main(servername, credentialsfilename, catalog): asset = { "tag:isrd.isi.edu,2017:asset": { "sha256": "checksum", "url_pattern": "/hatrac/Microscopy/{{{slide_id}}}/{{{filename}}}", "filename_column": "filename", "byte_count_column": "bytes" } } def apply(catalog, goal): """ Apply the goal configuration to live catalog """ print('applying...') counter = 0 ready = False while ready == False: try: catalog.applyCatalogConfig(goal) ready = True except HTTPError as err: print(err) print(err.errno) if err.errno == CONFLICT: et, ev, tb = sys.exc_info() print('Conflict Exception "%s"' % str(ev)) counter = counter + 1 if counter >= 5: print('%s' % str(traceback.format_exception(et, ev, tb))) ready = True else: print('Retrying...') except: et, ev, tb = sys.exc_info() print(str(et)) print('Exception "%s"' % str(ev)) print('%s' % str(traceback.format_exception(et, ev, tb))) ready = True credentials = json.load(open(credentialsfilename)) catalog = ErmrestCatalog('https', servername, catalog, credentials) try: goal = catalog.get_catalog_model() except AttributeError: try: goal = catalog.getCatalogModel() except: et, ev, tb = sys.exc_info() print('got exception "%s"' % str(ev)) print('%s' % str(traceback.format_exception(et, ev, tb))) sys.exit(1) schema_name = 'Microscopy' table_name = 'Scan' column_name = 'HTTP URL' annotations = goal.column(schema_name, table_name, column_name).annotations annotations.update(asset) apply(catalog, goal) print('Successfully updated the asset annotation for the column %s' % column_name)
# replace these with your real group IDs #"curator": "https://auth.globus.org/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", server = DerivaServer('https', servername, credentials) catalog = ErmrestCatalog('https', servername, catalog_number, credentials) grps = AttrDict({ "admin": "https://auth.globus.org/80af39fa-9503-11e8-88d8-0a7d99bc78fe", "writer": "https://auth.globus.org/72bdb36c-9503-11e8-8c03-0e847f194132", "reader": "https://auth.globus.org/5bd8b30e-9503-11e8-ba34-0e5b3fbbcf14" }) model = catalog.getCatalogModel() print(catalog._catalog_id) model.acls.update({ "owner": [grps.admin], "insert": [grps.writer], "update": [], "delete": [], "select": [grps.writer, grps.reader], "enumerate": ["*"], }) #schema = model.schemas['Core'] schema = model.schemas['public'] tab_list = ['ermrest_client']
def main(): def python_value(s): try: val = ast.literal_eval(s) except ValueError: val = s return val parser = argparse.ArgumentParser( description='Dump definition for catalog {}:{}') parser.add_argument('server', help='Catalog server name') parser.add_argument('--catalog-id', default=1, help='ID number of desired catalog') parser.add_argument('--dir', default="catalog-configs", help='output directory name)') parser.add_argument( '--table', default=None, help='Only dump out the spec for the specified table. Format is ' 'schema_name:table_name') parser.add_argument('--schemas', type=python_value, default=None, help='Only dump out the spec for the specified ' 'schemas (value or list).') parser.add_argument('--skip-schemas', type=python_value, default=None, help='List of schema so skip over') parser.add_argument('--graph', action='store_true', help='Dump graph of catalog') parser.add_argument('--graph-format', choices=['pdf', 'dot', 'png', 'svg'], default='pdf', help='Format to use for graph dump') args = parser.parse_args() dumpdir = args.dir server = args.server catalog_id = args.catalog_id table = args.table schemas = args.schemas schemas = [schemas ] if schemas is not None and type(schemas) is str else schemas skip_schemas = args.skip_schemas skip_schemas = [skip_schemas] if skip_schemas is not None and type( skip_schemas) is str else skip_schemas try: os.makedirs(dumpdir, exist_ok=True) except OSError: print("Creation of the directory %s failed" % dumpdir) sys.exit(1) credential = get_credential(server) catalog = ErmrestCatalog('https', server, catalog_id, credentials=credential) model_root = catalog.getCatalogModel() print('Catalog has {} schema and {} tables'.format( len(model_root.schemas), sum([len(v.tables) for k, v in model_root.schemas.items()]))) for k, s in model_root.schemas.items(): print(' {} has {} tables'.format(k, len(s.tables))) if table is not None: if ':' not in table: if args.schema is not None and len(schemas) == 1: schema_name = schemas[0] table_name = table else: print('Table name must be in form of schema:table') exit(1) else: [schema_name, table_name] = table.split(":") print("Dumping out table def....") stringer = DerivaCatalogToString(catalog) table_string = stringer.table_to_str(schema_name, table_name) with open(table_name + '.py', 'w') as f: print(table_string, file=f) elif args.graph: graph = DerivaCatalogToGraph(catalog) graphfile = '{}_{}'.format(server, catalog_id) graph.catalog_to_graph(skip_schemas=skip_schemas, schemas=schemas, skip_terms=True, skip_assocation_tables=True) graph.save(filename=graphfile, format=args.graphformat) else: print("Dumping catalog def....") stringer = DerivaCatalogToString(catalog) catalog_string = stringer.catalog_to_str() with open('{}/{}_{}.py'.format(dumpdir, server, catalog_id), 'w') as f: print(catalog_string, file=f) for schema_name in model_root.schemas: if skip_schemas is not None and schema_name in skip_schemas: continue print("Dumping schema def for {}....".format(schema_name)) schema_string = stringer.schema_to_str(schema_name) with open('{}/{}.schema.py'.format(dumpdir, schema_name), 'w') as f: print(schema_string, file=f) for schema_name, schema in model_root.schemas.items(): for i in schema.tables: print('Dumping {}:{}'.format(schema_name, i)) table_string = stringer.table_to_str(schema_name, i) filename = '{}/{}/{}.py'.format(dumpdir, schema_name, i) os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, 'w') as f: print(table_string, file=f)
def create_table(server): credential = get_credential(server) catalog = ErmrestCatalog('https', server, 1, credentials=credential) model_root = catalog.getCatalogModel() schema = model_root.schemas['Common'] table = schema.create_table(catalog, table_def)
credential = get_credential(hostname) catalog_ermrest = ErmrestCatalog('https', hostname, catalog_number, credentials=credential) catalog = model_elements.DerivaCatalog(catalog_ermrest) pb = catalog.getPathBuilder() if schema_name not in catalog.model.schemas.keys(): schema = catalog.create_schema(schema_name, comment=pdb['title']) else: schema = catalog.model.schemas[schema_name] model = catalog_ermrest.getCatalogModel() if flag_AnnotateTable_RowName: # row_name_list = [('chem_comp', 'id')] model_root = catalog_ermrest.getCatalogModel() for p_table, p_column in row_name_list: print(p_table) tab = model_root.schemas[schema_name].tables[p_table] tab.annotations.update({ chaise_tags.table_display: { 'row_name': { "row_markdown_pattern": "{{{" + p_column + "}}}" } } })
class AttrConfig: def __init__(self, server, catalog_id, config_file, credentials, verbose=False, schema_name=None, table_name=None): self.config = json.load(open(config_file)) self.ignored_schema_patterns = [] ip = self.config.get("ignored_schema_patterns") if ip is not None: for p in ip: self.ignored_schema_patterns.append(re.compile(p)) self.known_attrs = self.config.get(u'known_attributes') self.managed_annotations = self.known_attrs.get(u'managed') self.known_annotations = self.managed_annotations self.all_annotations = self.known_annotations self.ignored_annotations = self.known_attrs.get(u'ignored') if self.ignored_annotations is not None: self.all_annotations = self.all_annotations + self.ignored_annotations self.ignore_unmanaged = self.known_attrs.get(u'ignore_all_unmanaged') self.annotation_specs = dict() for key in AttrSpecList.SPEC_TYPES: self.annotation_specs[key] = self.make_speclist(key) self.server = server self.catalog_id = catalog_id self.verbose = verbose old_catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.saved_toplevel_config = ConfigUtil.find_toplevel_node(old_catalog.getCatalogModel(), schema_name, table_name) self.catalog = ErmrestCatalog('https', self.server, self.catalog_id, credentials) self.toplevel_config = ConfigUtil.find_toplevel_node(self.catalog.getCatalogModel(), schema_name, table_name) def make_speclist(self, name): d = self.config.get(unicode(name)) if d is None: d = [dict()] return AttrSpecList(self.known_attrs, d) def find_best_schema_specs(self, schema_name): specs = dict() for key in self.managed_annotations: specs[key] = self.annotation_specs["schema_annotations"].find_best_schema_spec(schema_name, key=key) return specs def find_best_table_specs(self, schema_name, table_name): specs = dict() for key in self.managed_annotations: specs[key] = self.annotation_specs["table_annotations"].find_best_table_spec(schema_name, table_name, key=key) return specs def find_best_fkey_specs(self, fkey): specs = dict() for key in self.managed_annotations: specs[key] = self.annotation_specs["foreign_key_annotations"].find_best_foreign_key_spec(fkey.table.schema.name, fkey.table.name, fkey.names, key=key) return specs def find_best_column_specs(self, schema_name, table_name, column_name): specs = dict() for key in self.managed_annotations: specs[key] = self.annotation_specs["column_annotations"].find_best_column_spec(schema_name, table_name, column_name, key=key) return specs def node_name(self, node): if isinstance(node, ermrest_model.Schema): return "schema {s}".format(s=str(node.name)) if isinstance(node, ermrest_model.Table): return "table {s}.{t}".format(s=str(node.schema.name), t=str(node.name)) if isinstance(node, ermrest_model.Column): return "column {s}.{t}.{c}".format(s=str(node.table.schema.name), t=str(node.table.name), c=str(node.name)) if isinstance(node, ermrest_model.ForeignKey): return "foreign key {n}".format(n=str(node.names)) return str("unknown node type {t}".format(t=type(node))) def set_node_annotations(self, node, specs, saved_node): if specs is None: if not self.ignore_unmanaged: if self.verbose: print("{n}: clearing annotations".format(n=self.node_name(node))) node.annotations.clear() return for k in self.managed_annotations: s = specs.get(k) if s is not None and u'value' in s: if self.verbose: print("{n}: setting {k} to {v}".format(n=self.node_name(node), k=k, v=s[u'value'])) node.annotations[k] = s[u'value'] elif k in node.annotations: if self.verbose: print("{n}: clearing {k}".format(n=self.node_name(node), k=k)) node.annotations.pop(k) if not self.ignore_unmanaged: for k in node.annotations.keys(): if k not in self.all_annotations: raise ValueError("annotation key {k} is neither managed nor ignored".format(k=k)) def set_table_annotations(self, table, saved_table): self.set_node_annotations(table, self.find_best_table_specs(table.schema.name, table.name), saved_table) for column in table.column_definitions: self.set_column_annotations(column, self.find_named_column(saved_table, column.name)) for fkey in table.foreign_keys: self.set_fkey_annotations(fkey, self.find_corresponding_fkey(saved_table, fkey)) def find_corresponding_fkey(self, table, base_fkey): if table is None: return None if base_fkey.names is None or len(base_fkey.names) == 0: return None names = base_fkey.names[0] if len(names) != 2: return None for fkey in table.foreign_keys: if fkey is not None and fkey.names is not None and len(fkey.names) > 0: for n in fkey.names: if len(n) == 2 and n[0] == names[0] and n[1] == names[1]: return fkey return None def find_named_column(self, table, column_name): if table is None: return None for column in table.column_definitions: if column.name == column_name: return column return None def find_named_schema(self, catalog, schema_name): if catalog is None or catalog.schemas is None: return None return catalog.schemas.get(schema_name) def find_named_table(self, schema, table_name): if schema is None: return None if schema.tables is None: return None return schema.tables.get(table_name) def set_fkey_annotations(self, fkey, saved_fkey): self.set_node_annotations(fkey, self.find_best_fkey_specs(fkey), saved_fkey) def set_column_annotations(self, column, saved_column): self.set_node_annotations(column, self.find_best_column_specs(column.table.schema.name, column.table.name, column.name), saved_column) def set_schema_annotations(self, schema, saved_schema): for pat in self.ignored_schema_patterns: if pat.match(schema.name) is not None: print("ignoring schema {s}".format(s=schema.name)) return specs = self.find_best_schema_specs(schema.name) self.set_node_annotations(schema, specs, saved_schema) for table in schema.tables.values(): self.set_table_annotations(table, self.find_named_table(saved_schema, table.name)) def set_catalog_annotations(self): specs = dict() for key in self.managed_annotations: specs[key] = self.annotation_specs["catalog_annotations"].find_catalog_spec(key) self.set_node_annotations(self.toplevel_config, specs, self.saved_toplevel_config) for schema in self.toplevel_config.schemas.values(): self.set_schema_annotations(schema, self.find_named_schema(self.saved_toplevel_config, schema.name)) def set_attributes(self): if isinstance(self.toplevel_config, ermrest_model.Model): self.set_catalog_annotations() elif isinstance(self.toplevel_config, ermrest_model.Schema): self.set_schema_annotations(self.toplevel_config, self.saved_toplevel_config) elif isinstance(self.toplevel_config, ermrest_model.Table): self.set_table_annotations(self.toplevel_config, self.saved_toplevel_config) else: raise ValueError("toplevel config is a {t}".format(t=str(type(self.toplevel_config)))) def apply_annotations(self): self.toplevel_config.apply(self.saved_toplevel_config)
class DerivaDumpCatalogCLI (BaseCLI): def __init__(self, description, epilog): super(DerivaDumpCatalogCLI, self).__init__(description, epilog, VERSION, hostname_required=True) def python_value(s): try: val = ast.literal_eval(s) except ValueError: val = s return val self.dumpdir = '' self.host = None self.catalog_id = 1 self.graph_format = None self.catalog = None # parent arg parser parser = self.parser parser.add_argument('--catalog', '--catalog-id', metavar='CATALOG-NUMBER', default=1, help='ID number of desired catalog') parser.add_argument('--dir', default="catalog-configs", help='output directory name') group = parser.add_mutually_exclusive_group() group.add_argument('--table', default=None, help='Only dump out the spec for the specified table. Format is ' 'schema_name:table_name') parser.add_argument('--schemas', nargs='*', default=[], help='Only dump out the spec for the specified schemas.') parser.add_argument('--skip-schemas', nargs='*', default=[], help='List of schema so skip over') group.add_argument('--graph', action='store_true', help='Dump graph of catalog') parser.add_argument('--graph-format', choices=['pdf', 'dot', 'png', 'svg'], default='pdf', help='Format to use for graph dump') @staticmethod def _get_credential(host_name, token=None): if token: return {"cookie": "webauthn={t}".format(t=token)} else: return get_credential(host_name) def _dump_table(self, schema_name, table_name, stringer=None, dumpdir='.'): logger.info("Dumping out table def: {}:{}".format(schema_name,table_name)) if not stringer: stringer = DerivaCatalogToString(self.catalog) table_string = stringer.table_to_str(schema_name, table_name) filename= dumpdir + '/' + table_name + '.py' os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, 'wb') as f: f.write(table_string.encode("utf-8")) def _dump_catalog(self): stringer = DerivaCatalogToString(self.catalog) catalog_string = stringer.catalog_to_str() with open('{}/{}_{}.py'.format(self.dumpdir, self.host, self.catalog_id), 'wb') as f: f.write(catalog_string.encode("utf-8")) for schema_name in self.schemas: logger.info("Dumping schema def for {}....".format(schema_name)) schema_string = stringer.schema_to_str(schema_name) with open('{}/{}.schema.py'.format(self.dumpdir, schema_name), 'wb') as f: f.write(schema_string.encode("utf-8")) for schema_name, schema in self.model.schemas.items(): if schema_name in self.schemas: for table_name in schema.tables: self._dump_table(schema_name, table_name, stringer=stringer, dumpdir='{}/{}'.format(self.dumpdir, schema_name)) def _graph_catalog(self): graph = DerivaCatalogToGraph(self.catalog) graphfile = '{}_{}'.format(self.host, self.catalog_id) graph.catalog_to_graph(schemas=[s for s in self.schemas if s not in ['_acl_admin', 'public', 'WWW']], skip_terms=True, skip_association_tables=True) graph.save(filename=graphfile, format=self.graph_format) def main(self): args = self.parse_cli() self.dumpdir = args.dir self.host = args.host self.catalog_id = args.catalog self.graph_format = args.graph_format if self.host is None: eprint('Host name must be provided') return 1 self.catalog = ErmrestCatalog('https', self.host, self.catalog_id, credentials=self._get_credential(self.host)) self.model = self.catalog.getCatalogModel() self.schemas = [s for s in (args.schemas if args.schemas else self.model.schemas) if s not in args.skip_schemas ] try: os.makedirs(self.dumpdir, exist_ok=True) except OSError as e: sys.stderr.write(str(e)) return 1 logger.info('Catalog has {} schema and {} tables'.format(len(self.model.schemas), sum([len(v.tables) for k, v in self.model.schemas.items()]))) logger.info('\n'.join([' {} has {} tables'.format(k, len(s.tables)) for k, s in self.model.schemas.items()])) try: if args.table: if ':' not in args.table: raise DerivaDumpCatalogException('Table name must be in form of schema:table') [schema_name, table_name] = args.table.split(":") self._dump_table(schema_name, table_name) elif args.graph: self._graph_catalog() else: self._dump_catalog() except DerivaDumpCatalogException as e: print(e) except HTTPError as e: if e.response.status_code == requests.codes.unauthorized: msg = 'Authentication required for {}'.format(args.server) elif e.response.status_code == requests.codes.forbidden: msg = 'Permission denied' else: msg = e logging.debug(format_exception(e)) eprint(msg) except RuntimeError as e: sys.stderr.write(str(e)) return 1 except: traceback.print_exc() return 1 finally: sys.stderr.write("\n\n") return
filename_list = os.listdir(files_dir) filename_list.sort() credential = get_credential(hostname) server = DerivaServer('https', hostname, credential) catalog = server.connect_ermrest(catalog_number) pb = catalog.getPathBuilder() map_term_value = {} catalog_ermrest = ErmrestCatalog('https', hostname, catalog_number, credentials=credential) model = catalog_ermrest.getCatalogModel() # map to ID of Vocabulary table for vocab_dict in vocab_list: for k, v in vocab_dict.items(): vocab_table_name = '{}_{}_term'.format(k[0], k[1]) vocab_table_name = vocab_table_name[-50:] vocab_table = pb.schemas[vocab_schema_name].tables[vocab_table_name] entities = vocab_table.path.entities() for entity in entities: term_data_map[(k[0], k[1], entity['Name'])] = entity['ID'] # schema = model.schemas[schema_name] # for tab_name in schema.tables.keys(): # table = model.table(schema_name, tab_name) # for fkey_def in table.foreign_keys: