def add_file_to_cohort(file, description, cohort): """ Upload a file into a data collection and add that file into the set of files associated with a cohort analysis. :param file: local path to the file that should be uploaded and associated with the cohort :param description: Text that is used to describe the file that is being uploaded :param cohort: RID of the analysis cohort to which the file file should be assoicated. :return: None. """ credential = get_credential(synapseserver) store = HatracStore('https', synapseserver, credentials=credential) catalog = ErmrestCatalog('https', synapseserver, 1, credentials=credential) pb = catalog.getPathBuilder() zebrafish = pb.Zebrafish synapse = pb.Synapse collection = synapse.tables['Collection'] files = collection.insert([{'Description': description, 'URL':'dummy2'}]) newfileRID = files[0]['RID'] print('inserted file into collection {}'.format(newfileRID)) path = '/hatrac/Data/Data_{0}_{1}'.format(newfileRID, os.path.basename(file)) loc = store.put_obj(path, file) files[0]['URL'] = loc files[0]['Orig. Basename'] = os.path.basename(file) r = store.head(path) files[0]['MD5'] = r.headers['content-md5'] files[0]['#Bytes'] = r.headers['Content-Length'] files = collection.update(files) # Now link into cohort. collection_table = zebrafish.tables['Cohort Analysis_Collection'] newfileRID = collection_table.insert([{'Cohort Analysis': cohort, 'Collection': newfileRID}]) return
def main(): cli = BaseCLI("annotation rollback tool", None, version=__version__, hostname_required=True) cli.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1") cli.parser.add_argument("--snapshot", metavar="<snapshot ID", help="Catalog snapshot ID. Example: 2QG-VWP6-0YG0") args = cli.parse_cli() credential = get_credential(args.host, args.credential_file) rollback_annotation(args.host, args.catalog, snaptime=args.snapshot, credential=credential)
def get_synapses(study): """ Get the synapse data associated with a study. We will retrieve the actual data from the object store, and we will get both the before and after data if it is availabe. CSV version of the data will be read in and stored as a PANDA study: a dictionary that has URLs for the two images, before and after returns two pandas that have the synapses in them. """ credential = get_credential(synapseserver) objectstore = HatracStore('https', synapseserver, credentials=credential) # Get a path for a tempory file to store HATRAC results path = os.path.join(tempfile.mkdtemp(), 'image') try: # Get the before image from hatrac, be careful in case its missing if study['BeforeURL']: objectstore.get_obj(study['BeforeURL'], destfilename=path) img1 = pd.read_csv(path) if True: # Skip the second element, which has metadata in it.... img1.drop(img1.index[0], inplace=True) else: img1 = None # Get the after image from hatrac, be careful in case its missing if study['AfterURL']: objectstore.get_obj(study['AfterURL'], destfilename=path) img2 = pd.read_csv(path) if True: img2.drop(img2.index[0], inplace=True) else: img2 = None finally: shutil.rmtree(os.path.dirname(path)) return {'Before': img1, 'After': img2, 'Type': study['Type'], 'Study': study['Study'], 'Subject': study['Subject']}
def get_studies(studyid): credential = get_credential(synapseserver) if '@' in studyid: ermrest_catalog = ErmrestSnapshot('https', synapseserver, 1, credential) else: ermrest_catalog = ErmrestCatalog('https', synapseserver, 1, credential).latest_snapshot() githash = git_version() ermrest_snapshot = ermrest_catalog.snaptime # Get the current list of studies from the server. study_entities = get_synapse_studies(ermrest_catalog,studyid) print('Identified %d studies' % len(study_entities)) protocol_types = { 'PrcDsy20160101A': 'aversion', 'PrcDsy20170613A': 'conditioned-control', 'PrcDsy20170615A': 'unconditioned-control', 'PrcDsy20170613B': 'fullcycle-control', 'PrcDsy20171030A': 'groundtruth-control', 'PrcDsy20171030B': 'interval-groundtruth-control' } # Compute the alignment for each study, and fill in some useful values. for i in study_entities: i['Paired'] = False if protocol_types[i['Protocol']] == 'aversion': if i['Learner'] is True: i['Type'] = 'learner' else: i['Type'] = 'nonlearner' else: i['Type'] = protocol_types[i['Protocol']] try: i['Aligned'] = False i['Provenence'] = {'GITHash': githash, 'CatlogVersion': ermrest_snapshot} i['StudyID'] = studyid i['Alignment'] = ImageGrossAlignment.from_image_id(ermrest_catalog, i['BeforeImageID']) p = pd.DataFrame([i[pt] for pt in ['AlignP0', 'AlignP1', 'AlignP2']]) p = p.multiply(pd.DataFrame([{'z': 0.4, 'y': 0.26, 'x': 0.26}]*3)) i['StudyAlignmentPts'] = pd.DataFrame(transform_points(i['Alignment'].M_canonical, p.loc[:,['x','y','z']]), columns=['x', 'y', 'z']) # i['StudyAlignmentPts'] = pd.DataFrame(transform_points(i['Alignment'].M, p.loc[:,['x','y','z']]), # columns=['x', 'y', 'z']) i['Aligned'] = True i['AlignmentPts'] = dict() except ValueError: # Alignments missing.... print('Alingment missing for study: {0}'.format(i['Study'])) continue except NotImplementedError: print('Alignment Code Failed for study: {0}'.format(i['Study'])) continue return {'StudyID': studyid, 'Studies': list(study_entities), 'Provenence': {'GITHash': githash, 'CatlogVersion': ermrest_snapshot} }
def main(): cli = AclCLI() args = cli.parse_cli() table_name = cli.get_table_arg(args) schema_names = cli.get_schema_arg_list(args) credentials = get_credential(args.host, args.credential_file) save_groups = not (args.dryrun or args.omit_groups) for schema in schema_names: acl_config = AclConfig(args.host, args.catalog, args.config_file, credentials, schema_name=schema, table_name=table_name, verbose=args.verbose or args.debug) try: if save_groups: acl_config.save_groups() save_groups = False if not args.groups_only: acl_config.set_acls() if not args.dryrun: acl_config.apply_acls() except HTTPError as e: print(format_exception(e)) raise if args.dryrun: print(acl_config.dumps())
def setUp(self): self.server = 'dev.isrd.isi.edu' self.credentials = get_credential(self.server) self.catalog_id = None self.schema_name = 'TestSchema' self.table_name = 'TestTable' self.table_size = 100 self.column_count = 20 self.test_dir = tempfile.mkdtemp() (row, self.headers) = generate_test_csv(self.column_count) self.tablefile = '{}/{}.csv'.format(self.test_dir, self.table_name) with open(self.tablefile, 'w', newline='') as f: tablewriter = csv.writer(f) for i, j in zip(range(self.table_size + 1), row): tablewriter.writerow(j) self.configfile = os.path.dirname(os.path.realpath(__file__)) + '/config.py' self.catalog = TempErmrestCatalog('https', self.server, credentials=self.credentials) model = self.catalog.getCatalogModel() model.create_schema(self.catalog, em.Schema.define(self.schema_name)) self.table = DerivaCSV(self.tablefile, self.schema_name, column_map=True, key_columns='id') # self._create_test_table() self.table.create_validate_upload_csv(self.catalog, create=True, upload=True) logger.debug('Setup done....')
def synapses_to_bag(study_list, dest, protocol_types, bag_metadata=None, publish=False): """ Export all of the synapse data for every study in the study list. Also output a CVS file that contains an index of all of the data. The data indes is: StudyID, SubjectID, Study Type, FileNames for Before and After synapses. """ bag_metadata = bag_metadata if bag_metadata else {} credential = get_credential("synapse.isrd.isi.edu") objectstore = HatracStore('https', 'synapse.isrd.isi.edu', credentials=credential) current_dir = os.getcwd() try: os.chdir(dest) # Create an output directory for synapse files. os.makedirs('synapse-studies', mode=0o777, exist_ok=True) os.chdir('synapse-studies') dumpdir = os.getcwd() for study in study_list: copy_synapse_files(objectstore, study) # Now write out the CSV file will the list of studies... with open('studies.csv', 'w', newline='') as csvfile: synapsewriter = csv.writer(csvfile) # Write out header.... synapsewriter.writerow(['Study', 'Subject', 'Type', 'Learner', 'Before', 'After']) for study in study_list: study_type = protocol_types[study['Protocol']] url1 = study['BeforeURL'] url2 = study['AfterURL'] filename1 = filename2 = '' if url1: filename1 = (os.path.basename(url1.split(':')[0])) if url2: filename2 = (os.path.basename(url2.split(':')[0])) synapsewriter.writerow([study['Study'], study['Subject'], study_type, study['Learner'], filename1, filename2]) bdb.make_bag(dumpdir, metadata=bag_metadata) archivefile = bdb.archive_bag(dumpdir, 'zip') if publish: bagstore = HatracStore('https', 'synapse-dev.isrd.isi.edu', credentials=credential) hatrac_path = '/hatrac/Data/synapse-{0}'.format(bag_metadata['ERMRest-Snapshot']) return bagstore.put_obj(hatrac_path, archivefile) finally: os.chdir(current_dir) return archivefile
def main(subcommand, *args): bootstrap() if subcommand == 'login': if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys(): host = args[0] scope = HOST_TO_GCS_SCOPES[host] gnl = GlobusNativeLogin() tokens = gnl.login(no_browser=True, no_local_server=True, requested_scopes=(scope, CFDE_DERIVA_SCOPE)) access_token = gnl.find_access_token_for_scope(scope, tokens) print('Logged into host "%s" with scope: %s' % (host, scope)) else: raise ValueError("Expected hostname, one of the following: %s" % list(HOST_TO_GCS_SCOPES.keys())) elif subcommand == 'logout': if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys(): host = args[0] scope = HOST_TO_GCS_SCOPES[host] gnl = GlobusNativeLogin() gnl.logout(requested_scopes=(scope, CFDE_DERIVA_SCOPE)) print('Logged out of host "%s" with scope: %s' % (host, scope)) else: raise ValueError("Expected hostname, one of the following: %s" % list(HOST_TO_GCS_SCOPES.keys())) elif subcommand == 'headers': if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys( ) and args[0] in HOST_TO_GCS_ENDPOINTS.keys(): host = args[0] url = HOST_TO_GCS_ENDPOINTS[host] headers = get_archive_headers_map(host) if headers: print('Header map for "%s" (%s):\n%s' % (host, url, headers)) else: print('Login required for host: "%s"' % host) else: raise ValueError("Expected hostname, one of the following: %s" % list(HOST_TO_GCS_SCOPES.keys())) elif subcommand == 'credential': if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys( ) and args[0] in HOST_TO_GCS_ENDPOINTS.keys(): host = args[0] url = HOST_TO_GCS_ENDPOINTS[host] credential = get_credential(host) if credential: print('Credential for "%s" (%s):\n%s' % (host, url, credential)) else: print('Login required for host: "%s"' % host) else: raise ValueError("Expected hostname, one of the following: %s" % list(HOST_TO_GCS_SCOPES.keys())) else: raise ValueError('unknown sub-command "%s"' % subcommand)
def main(): cli = ConfigBaseCLI("annotation config tool", None, version=MY_VERSION) args = cli.parse_cli() table_name = cli.get_table_arg(args) schema_names = cli.get_schema_arg_list(args) credentials = get_credential(args.host, args.credential_file) for schema in schema_names: attr_config = AttrConfig(args.host, args.catalog, args.config_file, credentials, args.verbose or args.debug, schema, table_name) attr_config.set_attributes() if not args.dryrun: attr_config.apply_annotations()
def suite_setup(self): # create catalog server = DerivaServer('https', self._hostname, credentials=get_credential(self._hostname)) if self._reuse_catalog_id: self._ermrest_catalog = server.connect_ermrest( self._reuse_catalog_id) self.unit_teardown( ) # in the event that the last run terminated abruptly and didn't properly teardown else: self._ermrest_catalog = server.create_ermrest_catalog()
def add_file_to_replicant(dataset_rid, fmap, description=''): """ Upload a file into a data collection and add that file into the set of files associated with a cohort analysis. :param file: local path to the file that should be uploaded and associated with the cohort :param description: Text that is used to describe the file that is being uploaded :param cohort: RID of the analysis cohort to which the file file should be assoicated. :return: None. """ credential = get_credential(pbcserver) store = HatracStore('https', pbcserver, credentials=credential) catalog = ErmrestCatalog('https', pbcserver, 1, credentials=credential) (experiment_rid, biosample_rid, replicate_rid, filename) = fmap dirname = re.sub('_[0-9]+_pre_rec$', '', filename) filename = filename + '.mrc' path = '{}/{}'.format(dirname, filename) print('Uploading ', path) objpath = '/hatrac/commons/data/{}/{}/{}?parents=true'.format( dataset_rid, replicate_rid, os.path.basename(filename)) print('to ', objpath) loc = store.put_obj(objpath, path) print(loc) r = store.head(objpath) md5 = r.headers['content-md5'] byte_count = r.headers['Content-Length'] submit_time = r.headers['Date'] file = { 'dataset': dataset_rid, 'anatomy': pancreas, 'device': xray_tomography, 'equipment_model': 'commons:600:', 'description': description, 'url': loc, 'filename': os.path.basename(filename), 'file_type': 'commons:601:', 'byte_count': byte_count, 'submitted_on': submit_time, 'md5': md5, 'replicate': replicate_rid } print(file) pb = catalog.getPathBuilder() isa = pb.isa tomography_data = isa.tables['xray_tomography_data'] try: newrid = tomography_data.insert([file]) except: newrid = tomography_data.update([file]) return
def setUpClass(cls): logger.debug("setupUpClass begin") credential = os.getenv("DERIVA_PY_TEST_CREDENTIAL") or get_credential( hostname) server = DerivaServer('https', hostname, credential) cls.catalog = server.create_ermrest_catalog() try: define_test_schema(cls.catalog) populate_test_catalog(cls.catalog) except Exception: # on failure, delete catalog and re-raise exception cls.catalog.delete_ermrest_catalog(really=True) raise logger.debug("setupUpClass done")
def fetch_studies(fileid): """ Get the set of files associated with a cohort analysis. :param fileid: RID of the saved analysis data. :return: None. """ credential = get_credential(synapseserver) if '@' in fileid: [fileid, snaptime] = fileid.split('@') catalog = ErmrestSnapshot('https', synapseserver, 1, snaptime, credentials=credential) else: catalog = ErmrestCatalog('https', synapseserver, 1, credentials=credential) catalog = catalog.latest_snapshot() snaptime = catalog.snaptime hatrac = HatracStore('https', synapseserver, credentials=credential) pb = catalog.getPathBuilder() zebrafish = pb.Zebrafish synapse = pb.Synapse # Lets get some shortcuts for awkward table names. cohort_table = zebrafish.tables['Cohort Analysis'] collection_table = zebrafish.tables['Cohort Analysis_Collection'] collection = synapse.tables['Collection'] # Now get the studyid associated with this file.... studyid = collection.filter(collection.RID == fileid).link(collection_table).entities()[0]['Cohort Analysis'] path = cohort_table.alias('studyset').link(zebrafish.tables['Cohort Analysis_Collection']).link(collection) path = path.filter(path.studyset.RID == studyid) fileentity = collection.filter(collection.RID == fileid).entities()[0] file = fileentity['URL'] print('File description: {}'.format(fileentity['Description'])) try: # Get a path for a temporary file to store results tmpfile = os.path.join(tempfile.mkdtemp(), 'pairs-dump.pkl') hatrac.get_obj(file, destfilename=tmpfile) with open(tmpfile, 'rb') as fo: slist = pickle.load(fo) finally: shutil.rmtree(os.path.dirname(tmpfile)) print('Restored {0} studies from {1}'.format(len(slist['Studies']),studyid)) return studyid, slist
def create_online_client(uri): ''' Create a client to access the public CFDE Deriva Catalog URI in the form: ${protocol}://${hostname}/chaise/recordset/#${record_number}/ ''' import re from urllib.parse import urlparse from deriva.core import ErmrestCatalog, get_credential uri_parsed = urlparse(uri) catalog_number = int(re.match(r'^(\d+)/', uri_parsed.fragment).group(1)) credential = get_credential(uri_parsed.hostname) catalog = ErmrestCatalog(uri_parsed.scheme, uri_parsed.hostname, catalog_number, credential) pb = catalog.getPathBuilder() CFDE = pb.schemas['CFDE'] return CFDE
def __init__(self, values, connect_to_ermrest=True): self.attrs = [ "database", "hatrac_parent", "species_schema", "species_table", "chromosome_schema", "chromosome_table", "gene_type_schema", "gene_type_table", "gene_schema", "gene_table", "dbxref_schema", "dbxref_table", "catalog_id", "host", "curie_prefix", "source_file_schema", "source_file_table", "ontology_schema", "ontology_table", "scratch_db", "scratch_directory" ] config_file = None if isinstance(values, argparse.Namespace): if hasattr(values, 'config_file'): config_file = values.config_file for attr in self.attrs: if hasattr(values, attr): setattr(self, attr, getattr(values, attr)) else: setattr(self, attr, None) else: config_file = values.get('config_file') for attr in self.attrs: setattr(self, attr, values.get(attr)) if (config_file): file = open(config_file, "r") defaults = json.load(file) file.close() for attr in self.attrs: if defaults.get(attr) and not getattr(self, attr): setattr(self, attr, defaults[attr]) shell_attrs = [] for attr in self.attrs: val = getattr(self, attr) if val: shell_attrs.append('export {attr}="{val}"'.format(attr=attr, val=val)) self.shell_attr_string = ';'.join(shell_attrs) self.pb = None if connect_to_ermrest: self.credential = get_credential(self.host) server = DerivaServer('https', self.host, self.credential) catalog = server.connect_ermrest(self.catalog_id) self.model = catalog.getCatalogModel() self.pb = catalog.getPathBuilder()
def __init__(self, server, output_dir=None, kwargs=None, config=None, config_file=None, credentials=None, credential_file=None): self.server = server self.hostname = None self.output_dir = output_dir if output_dir else "." self.envars = kwargs if kwargs else dict() self.catalog = None self.store = None self.config = config self.cancelled = False self.credentials = credentials if credentials else dict() self.metadata = dict() self.sessions = dict() info = "%s v%s [Python %s, %s]" % ( self.__class__.__name__, VERSION, platform.python_version(), platform.platform(aliased=True)) logging.info("Initializing downloader: %s" % info) if not self.server: raise RuntimeError("Server not specified!") # server variable initialization self.hostname = self.server.get('host', '') if not self.hostname: raise RuntimeError("Host not specified!") protocol = self.server.get('protocol', 'https') self.server_url = protocol + "://" + self.hostname catalog_id = self.server.get("catalog_id", "1") session_config = self.server.get('session') # credential initialization if credential_file: self.credentials = get_credential(self.hostname, credential_file) # catalog and file store initialization if self.catalog: del self.catalog self.catalog = ErmrestCatalog( protocol, self.hostname, catalog_id, self.credentials, session_config=session_config) if self.store: del self.store self.store = HatracStore( protocol, self.hostname, self.credentials, session_config=session_config) # process config file if config_file and os.path.isfile(config_file): self.config = read_config(config_file)
def create_catalog(server='demo.derivacloud.org', catalog_name='test'): global catalog_id credentials = get_credential(server) catalog = DerivaServer('https', server, credentials=credentials).create_ermrest_catalog() catalog_id = catalog.catalog_id logger.info('Catalog_id is {}'.format(catalog_id)) logger.info('Configuring catalog....') catalog = DerivaCatalogConfigure(host, catalog_id=catalog_id) catalog.configure_baseline_catalog(catalog_name=catalog_name, admin='DERIVA Demo Admin', curator='DERIVA Demo Curator', writer='DERIVA Demo Writer', reader='*') return catalog
def main(): description = 'DERIVA Command line tool' parser = argparse.ArgumentParser(description=description) parser.add_argument('--server', help="Hatrac server") subparsers = parser.add_subparsers() # create the parser for the "list" command parser_list = subparsers.add_parser('list', aliases=['ls']) parser_list.add_argument('path', nargs='?', default='') parser_list.set_defaults(func=hatrac_list) # create the parser for the "dir" command parser_namespace = subparsers.add_parser('mkdir') parser_namespace.add_argument('path') parser_namespace.add_argument('-p', default=True) parser_namespace.set_defaults(func=hatrac_namespace) # copy file to local directory parser_copy = subparsers.add_parser('copy') parser_copy.add_argument('path1') parser_copy.add_argument('path2') parser_copy.set_defaults(func=hatrac_copy) # parse the args and call whatever function was selected args = parser.parse_args() urlparts = urlsplit(args.path, scheme='http') host = args.server if args.server else urlparts.netloc if host is None: print('Hatrac server name required') return if args.server: args.path.replace('/hatrac', '') if not args.path.startswith('/'): args.path = '/' + args.path args.path = '/hatrac' + args.path elif args.path == '/hatrac': # Missing trailing slash args.path = '/hatrac/' credential = get_credential(host) args.catalog = HatracStore(urlparts.scheme, host, credentials=credential) args.func(args)
def main(): cli = ConfigBaseCLI("annotation dump tool", None, version=MY_VERSION) cli.parser.add_argument('-l', help="list tags encountered", action="store_true") args = cli.parse_cli() managed_attrs = None if args.config_file is None: print("No config file specified") return 1 if args.host is None: print("No host specified") return 1 config = json.load(open(args.config_file)) credentials = get_credential(args.host, args.credential_file) annotations = Annotations(args.host, args.catalog, credentials, config) if args.l: for t in annotations.types_list(): print(t) if not args.l: print(annotations.dumps()) return 0
def main(): parser = argparse.ArgumentParser(description="Configure an Ermrest Catalog") parser.add_argument('server', help='Catalog server name') parser.add_argument('--catalog-id', default=1, help="ID number of desired catalog (Default:1)") parser.add_argument('--catalog-name', default=None, help="Name of catalog (Default:hostname)") parser.add_argument("--catalog", action='store_true', help='Configure a catalog') parser.add_argument("--schema", help='Name of schema to configure'), parser.add_argument('--table', default=None, metavar='SCHEMA_NAME:TABLE_NAME', help='Name of table to be configured') parser.add_argument('--set-policy', default='True', choices=[True, False], help='Access control policy to be applied to catalog or table') parser.add_argument('--reader-group', dest='reader', default=None, help='Group name to use for readers. For a catalog named "foo" defaults for foo-reader') parser.add_argument('--writer-group', dest='writer', default=None, help='Group name to use for writers. For a catalog named "foo" defaults for foo-writer') parser.add_argument('--curator-group', dest='curator', default=None, help='Group name to use for readers. For a catalog named "foo" defaults for foo-curator') parser.add_argument('--admin-group', dest='admin', default=None, help='Group name to use for readers. For a catalog named "foo" defaults for foo-admin') parser.add_argument('--publish', default=False, action='store_true', help='Make the catalog or table accessible for reading without logging in') args = parser.parse_args() credentials = get_credential(args.server) catalog = ErmrestCatalog('https', args.server, args.catalog_id, credentials=credentials) try: if args.catalog: print('Configuring catalog {}:{}'.format(args.server, args.catalog_id)) configure_baseline_catalog(catalog, catalog_name=args.catalog_name, reader=args.reader, writer=args.writer, curator=args.curator, admin=args.admin, set_policy=args.set_policy, anonymous=args.publish) if args.table: [schema_name, table_name] = args.table.split(':') table = catalog.getCatalogModel().schemas[schema_name].tables[table_name] configure_table_defaults(catalog, table, set_policy=args.set_policy, anonymous=args.publish) except DerivaConfigError as e: print(e.msg) return
def main(): parser = argparse.ArgumentParser( description="Configure an Ermrest Catalog") parser.add_argument('server', help='Catalog server name') parser.add_argument('--catalog-id', default=1, help="ID number of desired catalog (Default:1)") parser.add_argument('--table', default=None, metavar='SCHEMA_NAME:TABLE_NAME', help='Name of table to be configured') parser.add_argument( '--asset-table', default=None, metavar='KEY_COLUMN', help='Create an asset table linked to table on key_column') parser.add_argument( '--config', default=None, help='python script to set up configuration variables)') args = parser.parse_args() credentials = get_credential(args.server) catalog = ErmrestCatalog('https', args.server, args.catalog_id, credentials=credentials) if args.table: [schema_name, table_name] = args.table.split(':') table = catalog.getCatalogModel( ).schemas[schema_name].tables[table_name] if args.asset_table: if not args.table: print('Creating asset table requires specification of a table') exit(1) create_asset_table(catalog, table, args.asset_table)
def process(self): target_url_param = "target_url" target_url = self.parameters.get(target_url_param) if not target_url: raise DerivaDownloadConfigurationError( "%s is missing required parameter '%s' from %s" % (self.__class__.__name__, target_url_param, PROCESSOR_PARAMS_KEY)) if self.envars: target_url = target_url.format(**self.envars) target_url = target_url.strip(" ") upr = urlsplit(target_url, "https") self.scheme = upr.scheme.lower() self.netloc = upr.netloc self.path = upr.path.strip("/") host = urlunsplit((self.scheme, upr.netloc, "", "", "")) creds = get_credential(host) if not creds: logging.info("Unable to locate credential entry for: %s" % host) self.credentials = creds or dict() return self.outputs
def init_variables(catalog_num=1): server = 'pbcconsortium.isrd.isi.edu' credential = get_credential(server) catalog = ErmrestCatalog('https', server, catalog_num, credentials=credential) model_root = catalog.getCatalogModel() __main__.catalog = catalog __main__.model_root = model_root # Get references to main tables for manipulating the model. __main__.Experiment = model_root.table('Beta_Cell', 'Experiment') __main__.Specimen = model_root.table('Beta_Cell', 'Specimen') __main__.Biosample = model_root.table('Beta_Cell', 'Biosample') __main__.Dataset = model_root.table('Beta_Cell', 'Dataset') __main__.imaging_data = model_root.table('isa', 'imaging_data') __main__.model = model_root.table("viz", 'model') # Get references to the main tables for managing their contents using DataPath library pb = catalog.getPathBuilder() # Get main schema isa = pb.isa viz = pb.viz vocab = pb.vocab Beta_Cell = pb.Beta_Cell __main__.pb = pb __main__.isa = isa __main__.vocab = vocab # Get tables.... __main__.Experiment_dp = Beta_Cell.Experiment __main__.Biosample_dp = Beta_Cell.Biosample __main__.dataset_dp = Beta_Cell.Dataset __main__.XRay_Tomography_dp = Beta_Cell.XRay_Tomography_Data __main__.Specimen_dp = Beta_Cell.Specimen __main__.model_dp = viz.model
def __init__(self, scheme='https', servername='app.nih-cfde.org', catalog='registry', credentials=None, session_config=None): """Bind to specified registry. Note: this binding operates as an authenticated client identity and may expose different capabilities depending on the client's role within the organization. """ if credentials is None: credentials = get_credential(servername) if not session_config: session_config = DEFAULT_SESSION_CONFIG.copy() session_config["allow_retry_on_all_methods"] = True self._catalog = ErmrestCatalog(scheme, servername, catalog, credentials, session_config=session_config) self._builder = self._catalog.getPathBuilder()
from deriva.core import HatracStore, ErmrestCatalog, get_credential, DerivaPathError import deriva.core.ermrest_model as em import argparse parser = argparse.ArgumentParser() parser.add_argument('hostname') parser.add_argument('catalog_number') parser.add_argument('schema_name') args = parser.parse_args() hostname = args.hostname schema_name = args.schema_name catalog_number = args.catalog_number term_table = 'Step_Type' term_comment = '' credential = get_credential(hostname) catalog = ErmrestCatalog('https', hostname, catalog_number, credentials=credential) def create_vocabulary_table(catalog,term_table, term_comment): model_root = catalog.getCatalogModel() new_vocab_table = \ model_root.schemas[schema_name].create_table(catalog, em.Table.define_vocabulary(term_table,'CORE:{RID}',comment=term_comment) ) create_vocabulary_table(catalog,term_table,term_comment)
key_defs = [em.Key.define(['RID'], constraint_names=[('isa', 'BellCellStatus_RIDkey1')], ), ] fkey_defs = [] table_def = em.Table.define( table_name, column_defs=column_defs, key_defs=key_defs, fkey_defs=fkey_defs, annotations=table_annotations, acls=table_acls, acl_bindings=table_acl_bindings, comment=table_comment, provide_system=True ) def main(catalog, mode, replace=False): updater = CatalogUpdater(catalog) updater.update_table(mode, schema_name, table_def, replace=replace) if __name__ == "__main__": host = 'pbcconsortium.isrd.isi.edu' catalog_id = 1 mode, replace, host, catalog_id = parse_args(host, catalog_id, is_table=True) credential = get_credential(host) catalog = ErmrestCatalog('https', host, catalog_id, credentials=credential) main(catalog, mode, replace)
utils.set_foreign_key_acls(catalog, 'PDB', 'ihm_hdx_restraint', 'ihm_hdx_restraint_structure_id_fkey', foreign_key_acls) utils.set_foreign_key_acls( catalog, 'PDB', 'ihm_derived_angle_restraint', 'ihm_derived_angle_restraint_structure_id_fkey', foreign_key_acls) utils.set_foreign_key_acls( catalog, 'PDB', 'ihm_derived_dihedral_restraint', 'ihm_derived_dihedral_restraint_structure_id_fkey', foreign_key_acls) utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref', 'struct_ref_structure_id_fkey', foreign_key_acls) utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref', 'struct_ref_db_name_fkey', foreign_key_acls) utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref_seq', 'struct_ref_seq_structure_id_fkey', foreign_key_acls) utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref_seq_dif', 'struct_ref_seq_dif_structure_id_fkey', foreign_key_acls) utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref_seq_dif', 'struct_ref_seq_dif_details_fkey', foreign_key_acls) if __name__ == '__main__': args = BaseCLI("ad-hoc table creation tool", None, 1).parse_cli() credentials = get_credential(args.host, args.credential_file) main(args.host, 99, credentials)
fkey_defs=fkey_defs, annotations=table_annotations, acls=table_acls, acl_bindings=table_acl_bindings, comment=table_comment, provide_system=True) def main(catalog, mode, replace=False, really=False): updater = CatalogUpdater(catalog) table_def['column_annotations'] = column_annotations table_def['column_comment'] = column_comment updater.update_table(mode, schema_name, table_def, replace=replace, really=really) if __name__ == "__main__": host = 'pdb.isrd.isi.edu' catalog_id = 99 mode, replace, host, catalog_id = parse_args(host, catalog_id, is_table=True) catalog = ErmrestCatalog('https', host, catalog_id=catalog_id, credentials=get_credential(host)) main(catalog, mode, replace)
def __init__(self, server, **kwargs): self.server = server self.hostname = None self.catalog = None self.store = None self.cancelled = False self.output_dir = os.path.abspath(kwargs.get("output_dir", ".")) self.envars = kwargs.get("envars", dict()) self.config = kwargs.get("config") self.credentials = kwargs.get("credentials", dict()) config_file = kwargs.get("config_file") credential_file = kwargs.get("credential_file") self.metadata = dict() self.sessions = dict() info = "%s v%s [Python %s, %s]" % ( self.__class__.__name__, get_installed_version(VERSION), platform.python_version(), platform.platform(aliased=True)) logging.info("Initializing downloader: %s" % info) if not self.server: raise DerivaDownloadConfigurationError("Server not specified!") # server variable initialization self.hostname = self.server.get('host', '') if not self.hostname: raise DerivaDownloadConfigurationError("Host not specified!") protocol = self.server.get('protocol', 'https') self.server_url = protocol + "://" + self.hostname catalog_id = self.server.get("catalog_id", "1") session_config = self.server.get('session') # credential initialization token = kwargs.get("token") oauth2_token = kwargs.get("oauth2_token") username = kwargs.get("username") password = kwargs.get("password") if credential_file: self.credentials = get_credential(self.hostname, credential_file) elif token or oauth2_token or (username and password): self.credentials = format_credential(token=token, oauth2_token=oauth2_token, username=username, password=password) # catalog and file store initialization if self.catalog: del self.catalog self.catalog = ErmrestCatalog(protocol, self.hostname, catalog_id, self.credentials, session_config=session_config) if self.store: del self.store self.store = HatracStore(protocol, self.hostname, self.credentials, session_config=session_config) # init dcctx cid to a default self.set_dcctx_cid(self.__class__.__name__) # process config file if config_file: try: self.config = read_config(config_file) except Exception as e: raise DerivaDownloadConfigurationError(e)
def download(self, **kwargs): if not self.config: raise DerivaDownloadConfigurationError( "No configuration specified!") if self.config.get("catalog") is None: raise DerivaDownloadConfigurationError( "Catalog configuration error!") ro_manifest = None ro_author_name = None ro_author_orcid = None remote_file_manifest = os.path.abspath(''.join([ os.path.join(self.output_dir, 'remote-file-manifest_'), str(uuid.uuid4()), ".json" ])) catalog_config = self.config['catalog'] self.envars.update(self.config.get('env', dict())) self.envars.update({"hostname": self.hostname}) # 1. If we don't have a client identity, we need to authenticate identity = kwargs.get("identity") if not identity: try: if not self.credentials: self.set_credentials(get_credential(self.hostname)) logging.info("Validating credentials for host: %s" % self.hostname) attributes = self.catalog.get_authn_session().json() identity = attributes["client"] except HTTPError as he: if he.response.status_code == 404: logging.info( "No existing login session found for host: %s" % self.hostname) except Exception as e: raise DerivaDownloadAuthenticationError( "Unable to validate credentials: %s" % format_exception(e)) wallet = kwargs.get("wallet", {}) # 2. Check for bagging config and initialize bag related variables bag_path = None bag_archiver = None bag_algorithms = None bag_config = self.config.get('bag') create_bag = True if bag_config else False if create_bag: bag_name = bag_config.get( 'bag_name', ''.join([ "deriva_bag", '_', time.strftime("%Y-%m-%d_%H.%M.%S") ])).format(**self.envars) bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name)) bag_archiver = bag_config.get('bag_archiver') bag_algorithms = bag_config.get('bag_algorithms', ['sha256']) bag_metadata = bag_config.get( 'bag_metadata', {"Internal-Sender-Identifier": "deriva@%s" % self.server_url}) bag_ro = create_bag and stob(bag_config.get('bag_ro', "True")) if create_bag: bdb.ensure_bag_path_exists(bag_path) bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata) if bag_ro: ro_author_name = bag.info.get( "Contact-Name", None if not identity else identity.get( 'full_name', identity.get('display_name', identity.get('id', None)))) ro_author_orcid = bag.info.get("Contact-Orcid") ro_manifest = ro.init_ro_manifest( author_name=ro_author_name, author_orcid=ro_author_orcid) bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID}) # 3. Process the set of queries by locating, instantiating, and invoking the specified processor(s) outputs = dict() base_path = bag_path if bag_path else self.output_dir for processor in catalog_config['query_processors']: processor_name = processor["processor"] processor_type = processor.get('processor_type') processor_params = processor.get('processor_params') try: query_processor = find_query_processor(processor_name, processor_type) processor = query_processor( self.envars, inputs=outputs, bag=create_bag, catalog=self.catalog, store=self.store, base_path=base_path, processor_params=processor_params, remote_file_manifest=remote_file_manifest, ro_manifest=ro_manifest, ro_author_name=ro_author_name, ro_author_orcid=ro_author_orcid, identity=identity, wallet=wallet) outputs = processor.process() except Exception as e: logging.error(format_exception(e)) if create_bag: bdb.cleanup_bag(bag_path) raise # 4. Execute anything in the transform processing pipeline, if configured transform_processors = self.config.get('transform_processors', []) if transform_processors: for processor in transform_processors: processor_name = processor["processor"] processor_type = processor.get('processor_type') processor_params = processor.get('processor_params') try: transform_processor = find_transform_processor( processor_name, processor_type) processor = transform_processor( self.envars, inputs=outputs, processor_params=processor_params, base_path=base_path, bag=create_bag, ro_manifest=ro_manifest, ro_author_name=ro_author_name, ro_author_orcid=ro_author_orcid, identity=identity, wallet=wallet) outputs = processor.process() except Exception as e: logging.error(format_exception(e)) raise # 5. Create the bag, and archive (serialize) if necessary if create_bag: try: if ro_manifest: ro.write_bag_ro_metadata(ro_manifest, bag_path) if not os.path.isfile(remote_file_manifest): remote_file_manifest = None bdb.make_bag( bag_path, algs=bag_algorithms, remote_file_manifest=remote_file_manifest if (remote_file_manifest and os.path.getsize(remote_file_manifest) > 0) else None, update=True) except Exception as e: logging.fatal("Exception while updating bag manifests: %s" % format_exception(e)) bdb.cleanup_bag(bag_path) raise finally: if remote_file_manifest and os.path.isfile( remote_file_manifest): os.remove(remote_file_manifest) logging.info('Created bag: %s' % bag_path) if bag_archiver is not None: try: archive = bdb.archive_bag(bag_path, bag_archiver.lower()) bdb.cleanup_bag(bag_path) outputs = { os.path.basename(archive): { LOCAL_PATH_KEY: archive } } except Exception as e: logging.error( "Exception while creating data bag archive: %s" % format_exception(e)) raise else: outputs = { os.path.basename(bag_path): { LOCAL_PATH_KEY: bag_path } } # 6. Execute anything in the post processing pipeline, if configured post_processors = self.config.get('post_processors', []) if post_processors: for processor in post_processors: processor_name = processor["processor"] processor_type = processor.get('processor_type') processor_params = processor.get('processor_params') try: post_processor = find_post_processor( processor_name, processor_type) processor = post_processor( self.envars, inputs=outputs, processor_params=processor_params, identity=identity, wallet=wallet) outputs = processor.process() except Exception as e: logging.error(format_exception(e)) raise return outputs