def add_file_to_cohort(file, description, cohort):
    """
    Upload a file into a data collection and add that file into the set of files associated with a cohort analysis.
    :param file: local path to the file that should be uploaded and associated with the cohort
    :param description: Text that is used to describe the file that is being uploaded
    :param cohort: RID of the analysis cohort to which the file file should be assoicated.
    :return: None.
    """
    credential = get_credential(synapseserver)
    store = HatracStore('https', synapseserver, credentials=credential)
    catalog = ErmrestCatalog('https', synapseserver, 1, credentials=credential)

    pb = catalog.getPathBuilder()
    zebrafish = pb.Zebrafish
    synapse = pb.Synapse

    collection = synapse.tables['Collection']
    files = collection.insert([{'Description': description, 'URL':'dummy2'}])
    newfileRID = files[0]['RID']
    print('inserted file into collection {}'.format(newfileRID))
    path = '/hatrac/Data/Data_{0}_{1}'.format(newfileRID, os.path.basename(file))
    loc = store.put_obj(path, file)
    files[0]['URL'] = loc
    files[0]['Orig. Basename'] = os.path.basename(file)

    r = store.head(path)
    files[0]['MD5'] = r.headers['content-md5']
    files[0]['#Bytes'] = r.headers['Content-Length']
    files = collection.update(files)

    # Now link into cohort.
    collection_table = zebrafish.tables['Cohort Analysis_Collection']
    newfileRID = collection_table.insert([{'Cohort Analysis': cohort, 'Collection': newfileRID}])
    return
def main():
    cli = BaseCLI("annotation rollback tool", None, version=__version__, hostname_required=True)
    cli.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1")
    cli.parser.add_argument("--snapshot", metavar="<snapshot ID", help="Catalog snapshot ID. Example: 2QG-VWP6-0YG0")
    args = cli.parse_cli()
    credential = get_credential(args.host, args.credential_file)
    rollback_annotation(args.host, args.catalog, snaptime=args.snapshot, credential=credential)
def get_synapses(study):
    """
    Get the synapse data associated with a study.  We will retrieve the actual data from the object store, and we will
    get both the before and after data if it is availabe.  CSV version of the data will be read in and stored as a PANDA

     study: a dictionary that has URLs for the two images, before and after
     returns two pandas that have the synapses in them.
     """
    credential = get_credential(synapseserver)
    objectstore = HatracStore('https', synapseserver, credentials=credential)

    # Get a path for a tempory file to store HATRAC results
    path = os.path.join(tempfile.mkdtemp(), 'image')
    try:
        # Get the before image from hatrac, be careful in case its missing
        if study['BeforeURL']:
            objectstore.get_obj(study['BeforeURL'], destfilename=path)
            img1 = pd.read_csv(path)
            if True: # Skip the second element, which has metadata in it....
                img1.drop(img1.index[0], inplace=True)
        else:
            img1 = None

        # Get the after image from hatrac, be careful in case its missing
        if study['AfterURL']:
            objectstore.get_obj(study['AfterURL'], destfilename=path)
            img2 = pd.read_csv(path)
            if True:
                img2.drop(img2.index[0], inplace=True)
        else:
            img2 = None
    finally:
        shutil.rmtree(os.path.dirname(path))
    return {'Before': img1, 'After': img2, 'Type': study['Type'], 'Study': study['Study'], 'Subject': study['Subject']}
def get_studies(studyid):
    credential = get_credential(synapseserver)
    if '@' in studyid:
        ermrest_catalog = ErmrestSnapshot('https', synapseserver, 1, credential)
    else:
        ermrest_catalog = ErmrestCatalog('https', synapseserver, 1, credential).latest_snapshot()

    githash = git_version()
    ermrest_snapshot = ermrest_catalog.snaptime

    # Get the current list of studies from the server.
    study_entities = get_synapse_studies(ermrest_catalog,studyid)

    print('Identified %d studies' % len(study_entities))

    protocol_types = {
        'PrcDsy20160101A': 'aversion',
        'PrcDsy20170613A': 'conditioned-control',
        'PrcDsy20170615A': 'unconditioned-control',
        'PrcDsy20170613B': 'fullcycle-control',
        'PrcDsy20171030A': 'groundtruth-control',
        'PrcDsy20171030B': 'interval-groundtruth-control'
    }

    # Compute the alignment for each study, and fill in some useful values.
    for i in study_entities:
        i['Paired'] = False
        if protocol_types[i['Protocol']] == 'aversion':
            if i['Learner'] is True:
                i['Type'] = 'learner'
            else:
                i['Type'] = 'nonlearner'
        else:
            i['Type'] = protocol_types[i['Protocol']]

        try:
            i['Aligned'] = False
            i['Provenence'] = {'GITHash': githash, 'CatlogVersion': ermrest_snapshot}
            i['StudyID'] = studyid
            i['Alignment'] = ImageGrossAlignment.from_image_id(ermrest_catalog, i['BeforeImageID'])
            p = pd.DataFrame([i[pt] for pt in ['AlignP0', 'AlignP1', 'AlignP2']])
            p = p.multiply(pd.DataFrame([{'z': 0.4, 'y': 0.26, 'x': 0.26}]*3))

            i['StudyAlignmentPts'] = pd.DataFrame(transform_points(i['Alignment'].M_canonical, p.loc[:,['x','y','z']]),
                                                  columns=['x', 'y', 'z'])
#            i['StudyAlignmentPts'] = pd.DataFrame(transform_points(i['Alignment'].M, p.loc[:,['x','y','z']]),
#                                                columns=['x', 'y', 'z'])
            i['Aligned'] = True
            i['AlignmentPts'] = dict()
        except ValueError:  # Alignments missing....
            print('Alingment missing for study: {0}'.format(i['Study']))
            continue
        except NotImplementedError:
            print('Alignment Code Failed for study: {0}'.format(i['Study']))
            continue

    return {'StudyID': studyid,
            'Studies': list(study_entities),
            'Provenence': {'GITHash': githash, 'CatlogVersion': ermrest_snapshot}
            }
Exemple #5
0
def main():
    cli = AclCLI()
    args = cli.parse_cli()
    table_name = cli.get_table_arg(args)
    schema_names = cli.get_schema_arg_list(args)
    credentials = get_credential(args.host, args.credential_file)
    save_groups = not (args.dryrun or args.omit_groups)
    for schema in schema_names:
        acl_config = AclConfig(args.host,
                               args.catalog,
                               args.config_file,
                               credentials,
                               schema_name=schema,
                               table_name=table_name,
                               verbose=args.verbose or args.debug)

        try:
            if save_groups:
                acl_config.save_groups()
                save_groups = False
            if not args.groups_only:
                acl_config.set_acls()
                if not args.dryrun:
                    acl_config.apply_acls()
        except HTTPError as e:
            print(format_exception(e))
            raise

        if args.dryrun:
            print(acl_config.dumps())
    def setUp(self):
        self.server = 'dev.isrd.isi.edu'
        self.credentials = get_credential(self.server)
        self.catalog_id = None
        self.schema_name = 'TestSchema'
        self.table_name = 'TestTable'

        self.table_size = 100
        self.column_count = 20
        self.test_dir = tempfile.mkdtemp()

        (row, self.headers) = generate_test_csv(self.column_count)
        self.tablefile = '{}/{}.csv'.format(self.test_dir, self.table_name)

        with open(self.tablefile, 'w', newline='') as f:
            tablewriter = csv.writer(f)
            for i, j in zip(range(self.table_size + 1), row):
                tablewriter.writerow(j)

        self.configfile = os.path.dirname(os.path.realpath(__file__)) + '/config.py'
        self.catalog = TempErmrestCatalog('https', self.server, credentials=self.credentials)

        model = self.catalog.getCatalogModel()
        model.create_schema(self.catalog, em.Schema.define(self.schema_name))

        self.table = DerivaCSV(self.tablefile, self.schema_name, column_map=True, key_columns='id')
      #  self._create_test_table()
        self.table.create_validate_upload_csv(self.catalog, create=True, upload=True)
        logger.debug('Setup done....')
Exemple #7
0
def synapses_to_bag(study_list, dest, protocol_types, bag_metadata=None, publish=False):
    """
    Export all of the synapse data for every study in the study list.
    Also output a CVS file that contains an index of all of the data.

    The data indes is: StudyID, SubjectID, Study Type, FileNames for Before and After synapses.

    """

    bag_metadata = bag_metadata if bag_metadata else {}

    credential = get_credential("synapse.isrd.isi.edu")
    objectstore = HatracStore('https', 'synapse.isrd.isi.edu', credentials=credential)

    current_dir = os.getcwd()
    try:
        os.chdir(dest)

        # Create an output directory for synapse files.
        os.makedirs('synapse-studies', mode=0o777, exist_ok=True)
        os.chdir('synapse-studies')
        dumpdir = os.getcwd()

        for study in study_list:
            copy_synapse_files(objectstore, study)

        # Now write out the CSV file will the list of studies...
        with open('studies.csv', 'w', newline='') as csvfile:
            synapsewriter = csv.writer(csvfile)

            # Write out header....
            synapsewriter.writerow(['Study', 'Subject', 'Type', 'Learner', 'Before', 'After'])
            for study in study_list:
                study_type = protocol_types[study['Protocol']]
                url1 = study['BeforeURL']
                url2 = study['AfterURL']

                filename1 = filename2 = ''
                if url1:
                    filename1 = (os.path.basename(url1.split(':')[0]))
                if url2:
                    filename2 = (os.path.basename(url2.split(':')[0]))

                synapsewriter.writerow([study['Study'], study['Subject'], study_type, study['Learner'],
                                        filename1, filename2])

        bdb.make_bag(dumpdir, metadata=bag_metadata)
        archivefile = bdb.archive_bag(dumpdir, 'zip')

        if publish:
            bagstore = HatracStore('https', 'synapse-dev.isrd.isi.edu', credentials=credential)
            hatrac_path = '/hatrac/Data/synapse-{0}'.format(bag_metadata['ERMRest-Snapshot'])
            return bagstore.put_obj(hatrac_path, archivefile)
    finally:
        os.chdir(current_dir)
    return archivefile
Exemple #8
0
def main(subcommand, *args):
    bootstrap()
    if subcommand == 'login':
        if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys():
            host = args[0]
            scope = HOST_TO_GCS_SCOPES[host]
            gnl = GlobusNativeLogin()
            tokens = gnl.login(no_browser=True,
                               no_local_server=True,
                               requested_scopes=(scope, CFDE_DERIVA_SCOPE))
            access_token = gnl.find_access_token_for_scope(scope, tokens)
            print('Logged into host "%s" with scope: %s' % (host, scope))
        else:
            raise ValueError("Expected hostname, one of the following: %s" %
                             list(HOST_TO_GCS_SCOPES.keys()))
    elif subcommand == 'logout':
        if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys():
            host = args[0]
            scope = HOST_TO_GCS_SCOPES[host]
            gnl = GlobusNativeLogin()
            gnl.logout(requested_scopes=(scope, CFDE_DERIVA_SCOPE))
            print('Logged out of host "%s" with scope: %s' % (host, scope))
        else:
            raise ValueError("Expected hostname, one of the following: %s" %
                             list(HOST_TO_GCS_SCOPES.keys()))
    elif subcommand == 'headers':
        if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys(
        ) and args[0] in HOST_TO_GCS_ENDPOINTS.keys():
            host = args[0]
            url = HOST_TO_GCS_ENDPOINTS[host]
            headers = get_archive_headers_map(host)
            if headers:
                print('Header map for "%s" (%s):\n%s' % (host, url, headers))
            else:
                print('Login required for host: "%s"' % host)
        else:
            raise ValueError("Expected hostname, one of the following: %s" %
                             list(HOST_TO_GCS_SCOPES.keys()))
    elif subcommand == 'credential':
        if len(args) > 0 and args[0] in HOST_TO_GCS_SCOPES.keys(
        ) and args[0] in HOST_TO_GCS_ENDPOINTS.keys():
            host = args[0]
            url = HOST_TO_GCS_ENDPOINTS[host]
            credential = get_credential(host)
            if credential:
                print('Credential for "%s" (%s):\n%s' %
                      (host, url, credential))
            else:
                print('Login required for host: "%s"' % host)
        else:
            raise ValueError("Expected hostname, one of the following: %s" %
                             list(HOST_TO_GCS_SCOPES.keys()))
    else:
        raise ValueError('unknown sub-command "%s"' % subcommand)
Exemple #9
0
def main():
    cli = ConfigBaseCLI("annotation config tool", None, version=MY_VERSION)
    args = cli.parse_cli()
    table_name = cli.get_table_arg(args)
    schema_names = cli.get_schema_arg_list(args)
    credentials = get_credential(args.host, args.credential_file)
    for schema in schema_names:
        attr_config = AttrConfig(args.host, args.catalog, args.config_file, credentials, args.verbose or args.debug,
                                 schema, table_name)
        attr_config.set_attributes()
        if not args.dryrun:
            attr_config.apply_annotations()
Exemple #10
0
 def suite_setup(self):
     # create catalog
     server = DerivaServer('https',
                           self._hostname,
                           credentials=get_credential(self._hostname))
     if self._reuse_catalog_id:
         self._ermrest_catalog = server.connect_ermrest(
             self._reuse_catalog_id)
         self.unit_teardown(
         )  # in the event that the last run terminated abruptly and didn't properly teardown
     else:
         self._ermrest_catalog = server.create_ermrest_catalog()
def add_file_to_replicant(dataset_rid, fmap, description=''):
    """
    Upload a file into a data collection and add that file into the set of files associated with a cohort analysis.
    :param file: local path to the file that should be uploaded and associated with the cohort
    :param description: Text that is used to describe the file that is being uploaded
    :param cohort: RID of the analysis cohort to which the file file should be assoicated.
    :return: None.
    """
    credential = get_credential(pbcserver)
    store = HatracStore('https', pbcserver, credentials=credential)
    catalog = ErmrestCatalog('https', pbcserver, 1, credentials=credential)

    (experiment_rid, biosample_rid, replicate_rid, filename) = fmap
    dirname = re.sub('_[0-9]+_pre_rec$', '', filename)
    filename = filename + '.mrc'
    path = '{}/{}'.format(dirname, filename)
    print('Uploading ', path)
    objpath = '/hatrac/commons/data/{}/{}/{}?parents=true'.format(
        dataset_rid, replicate_rid, os.path.basename(filename))
    print('to ', objpath)
    loc = store.put_obj(objpath, path)
    print(loc)
    r = store.head(objpath)
    md5 = r.headers['content-md5']
    byte_count = r.headers['Content-Length']
    submit_time = r.headers['Date']

    file = {
        'dataset': dataset_rid,
        'anatomy': pancreas,
        'device': xray_tomography,
        'equipment_model': 'commons:600:',
        'description': description,
        'url': loc,
        'filename': os.path.basename(filename),
        'file_type': 'commons:601:',
        'byte_count': byte_count,
        'submitted_on': submit_time,
        'md5': md5,
        'replicate': replicate_rid
    }
    print(file)

    pb = catalog.getPathBuilder()
    isa = pb.isa

    tomography_data = isa.tables['xray_tomography_data']
    try:
        newrid = tomography_data.insert([file])
    except:
        newrid = tomography_data.update([file])
    return
Exemple #12
0
 def setUpClass(cls):
     logger.debug("setupUpClass begin")
     credential = os.getenv("DERIVA_PY_TEST_CREDENTIAL") or get_credential(
         hostname)
     server = DerivaServer('https', hostname, credential)
     cls.catalog = server.create_ermrest_catalog()
     try:
         define_test_schema(cls.catalog)
         populate_test_catalog(cls.catalog)
     except Exception:
         # on failure, delete catalog and re-raise exception
         cls.catalog.delete_ermrest_catalog(really=True)
         raise
     logger.debug("setupUpClass done")
def fetch_studies(fileid):
    """
    Get the set of files associated with a cohort analysis.
    :param fileid: RID of the saved analysis data.
    :return: None.
    """

    credential = get_credential(synapseserver)

    if '@' in fileid:
        [fileid, snaptime] = fileid.split('@')
        catalog = ErmrestSnapshot('https', synapseserver, 1, snaptime, credentials=credential)
    else:
        catalog = ErmrestCatalog('https', synapseserver, 1, credentials=credential)
        catalog = catalog.latest_snapshot()
        snaptime = catalog.snaptime
    hatrac = HatracStore('https', synapseserver, credentials=credential)

    pb = catalog.getPathBuilder()
    zebrafish = pb.Zebrafish
    synapse = pb.Synapse

    # Lets get some shortcuts for awkward table names.
    cohort_table = zebrafish.tables['Cohort Analysis']
    collection_table = zebrafish.tables['Cohort Analysis_Collection']
    collection = synapse.tables['Collection']

    # Now get the studyid associated with this file....

    studyid = collection.filter(collection.RID == fileid).link(collection_table).entities()[0]['Cohort Analysis']

    path = cohort_table.alias('studyset').link(zebrafish.tables['Cohort Analysis_Collection']).link(collection)
    path = path.filter(path.studyset.RID == studyid)

    fileentity = collection.filter(collection.RID == fileid).entities()[0]
    file = fileentity['URL']
    print('File description: {}'.format(fileentity['Description']))

    try:
        # Get a path for a temporary file to store  results
        tmpfile = os.path.join(tempfile.mkdtemp(), 'pairs-dump.pkl')
        hatrac.get_obj(file, destfilename=tmpfile)
        with open(tmpfile, 'rb') as fo:
            slist = pickle.load(fo)
    finally:
        shutil.rmtree(os.path.dirname(tmpfile))

    print('Restored {0} studies from {1}'.format(len(slist['Studies']),studyid))
    return studyid, slist
Exemple #14
0
def create_online_client(uri):
    ''' Create a client to access the public CFDE Deriva Catalog
  URI in the form: ${protocol}://${hostname}/chaise/recordset/#${record_number}/
  '''
    import re
    from urllib.parse import urlparse
    from deriva.core import ErmrestCatalog, get_credential
    uri_parsed = urlparse(uri)
    catalog_number = int(re.match(r'^(\d+)/', uri_parsed.fragment).group(1))
    credential = get_credential(uri_parsed.hostname)
    catalog = ErmrestCatalog(uri_parsed.scheme, uri_parsed.hostname,
                             catalog_number, credential)
    pb = catalog.getPathBuilder()
    CFDE = pb.schemas['CFDE']
    return CFDE
Exemple #15
0
    def __init__(self, values, connect_to_ermrest=True):
        self.attrs = [
            "database", "hatrac_parent", "species_schema", "species_table",
            "chromosome_schema", "chromosome_table", "gene_type_schema",
            "gene_type_table", "gene_schema", "gene_table", "dbxref_schema",
            "dbxref_table", "catalog_id", "host", "curie_prefix",
            "source_file_schema", "source_file_table", "ontology_schema",
            "ontology_table", "scratch_db", "scratch_directory"
        ]

        config_file = None
        if isinstance(values, argparse.Namespace):
            if hasattr(values, 'config_file'):
                config_file = values.config_file
            for attr in self.attrs:
                if hasattr(values, attr):
                    setattr(self, attr, getattr(values, attr))
                else:
                    setattr(self, attr, None)
        else:
            config_file = values.get('config_file')
            for attr in self.attrs:
                setattr(self, attr, values.get(attr))

        if (config_file):
            file = open(config_file, "r")
            defaults = json.load(file)
            file.close()

            for attr in self.attrs:
                if defaults.get(attr) and not getattr(self, attr):
                    setattr(self, attr, defaults[attr])

        shell_attrs = []
        for attr in self.attrs:
            val = getattr(self, attr)
            if val:
                shell_attrs.append('export {attr}="{val}"'.format(attr=attr,
                                                                  val=val))
        self.shell_attr_string = ';'.join(shell_attrs)
        self.pb = None
        if connect_to_ermrest:
            self.credential = get_credential(self.host)
            server = DerivaServer('https', self.host, self.credential)
            catalog = server.connect_ermrest(self.catalog_id)
            self.model = catalog.getCatalogModel()
            self.pb = catalog.getPathBuilder()
Exemple #16
0
    def __init__(self, server,
                 output_dir=None, kwargs=None, config=None, config_file=None, credentials=None, credential_file=None):
        self.server = server
        self.hostname = None
        self.output_dir = output_dir if output_dir else "."
        self.envars = kwargs if kwargs else dict()
        self.catalog = None
        self.store = None
        self.config = config
        self.cancelled = False
        self.credentials = credentials if credentials else dict()
        self.metadata = dict()
        self.sessions = dict()

        info = "%s v%s [Python %s, %s]" % (
            self.__class__.__name__, VERSION, platform.python_version(), platform.platform(aliased=True))
        logging.info("Initializing downloader: %s" % info)

        if not self.server:
            raise RuntimeError("Server not specified!")

        # server variable initialization
        self.hostname = self.server.get('host', '')
        if not self.hostname:
            raise RuntimeError("Host not specified!")
        protocol = self.server.get('protocol', 'https')
        self.server_url = protocol + "://" + self.hostname
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # credential initialization
        if credential_file:
            self.credentials = get_credential(self.hostname, credential_file)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(
            protocol, self.hostname, catalog_id, self.credentials, session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(
            protocol, self.hostname, self.credentials, session_config=session_config)

        # process config file
        if config_file and os.path.isfile(config_file):
            self.config = read_config(config_file)
Exemple #17
0
def create_catalog(server='demo.derivacloud.org', catalog_name='test'):
    global catalog_id

    credentials = get_credential(server)
    catalog = DerivaServer('https', server,
                           credentials=credentials).create_ermrest_catalog()
    catalog_id = catalog.catalog_id
    logger.info('Catalog_id is {}'.format(catalog_id))

    logger.info('Configuring catalog....')
    catalog = DerivaCatalogConfigure(host, catalog_id=catalog_id)
    catalog.configure_baseline_catalog(catalog_name=catalog_name,
                                       admin='DERIVA Demo Admin',
                                       curator='DERIVA Demo Curator',
                                       writer='DERIVA Demo Writer',
                                       reader='*')
    return catalog
Exemple #18
0
def main():
    description = 'DERIVA Command line tool'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--server', help="Hatrac server")

    subparsers = parser.add_subparsers()

    # create the parser for the "list" command
    parser_list = subparsers.add_parser('list', aliases=['ls'])
    parser_list.add_argument('path', nargs='?', default='')
    parser_list.set_defaults(func=hatrac_list)

    # create the parser for the "dir" command
    parser_namespace = subparsers.add_parser('mkdir')
    parser_namespace.add_argument('path')
    parser_namespace.add_argument('-p', default=True)
    parser_namespace.set_defaults(func=hatrac_namespace)

    # copy  file to local directory
    parser_copy = subparsers.add_parser('copy')
    parser_copy.add_argument('path1')
    parser_copy.add_argument('path2')
    parser_copy.set_defaults(func=hatrac_copy)

    # parse the args and call whatever function was selected
    args = parser.parse_args()

    urlparts = urlsplit(args.path, scheme='http')
    host = args.server if args.server else urlparts.netloc
    if host is None:
        print('Hatrac server name required')
        return

    if args.server:
        args.path.replace('/hatrac', '')
        if not args.path.startswith('/'):
            args.path = '/' + args.path
        args.path = '/hatrac' + args.path
    elif args.path == '/hatrac':      # Missing trailing slash
        args.path = '/hatrac/'

    credential = get_credential(host)
    args.catalog = HatracStore(urlparts.scheme, host, credentials=credential)

    args.func(args)
Exemple #19
0
def main():
    cli = ConfigBaseCLI("annotation dump tool", None, version=MY_VERSION)
    cli.parser.add_argument('-l', help="list tags encountered", action="store_true")
    args = cli.parse_cli()
    managed_attrs = None
    if args.config_file is None:
        print("No config file specified")
        return 1
    if args.host is None:
        print("No host specified")
        return 1
    config = json.load(open(args.config_file))
    credentials = get_credential(args.host, args.credential_file)
    annotations = Annotations(args.host, args.catalog, credentials, config)
    if args.l:
        for t in annotations.types_list():
            print(t)
    if not args.l:
        print(annotations.dumps())
    return 0
Exemple #20
0
def main():
    parser = argparse.ArgumentParser(description="Configure an Ermrest Catalog")
    parser.add_argument('server', help='Catalog server name')
    parser.add_argument('--catalog-id', default=1, help="ID number of desired catalog (Default:1)")
    parser.add_argument('--catalog-name', default=None, help="Name of catalog (Default:hostname)")
    parser.add_argument("--catalog", action='store_true', help='Configure a catalog')
    parser.add_argument("--schema", help='Name of schema to configure'),
    parser.add_argument('--table', default=None, metavar='SCHEMA_NAME:TABLE_NAME',
                        help='Name of table to be configured')
    parser.add_argument('--set-policy', default='True', choices=[True, False],
                        help='Access control policy to be applied to catalog or table')
    parser.add_argument('--reader-group', dest='reader', default=None,
                        help='Group name to use for readers. For a catalog named "foo" defaults for foo-reader')
    parser.add_argument('--writer-group', dest='writer', default=None,
                        help='Group name to use for writers. For a catalog named "foo" defaults for foo-writer')
    parser.add_argument('--curator-group', dest='curator', default=None,
                        help='Group name to use for readers. For a catalog named "foo" defaults for foo-curator')
    parser.add_argument('--admin-group', dest='admin', default=None,
                        help='Group name to use for readers. For a catalog named "foo" defaults for foo-admin')
    parser.add_argument('--publish', default=False, action='store_true',
                        help='Make the catalog or table accessible for reading without logging in')

    args = parser.parse_args()

    credentials = get_credential(args.server)
    catalog = ErmrestCatalog('https', args.server, args.catalog_id, credentials=credentials)

    try:
        if args.catalog:
            print('Configuring catalog {}:{}'.format(args.server, args.catalog_id))
            configure_baseline_catalog(catalog, catalog_name=args.catalog_name,
                                       reader=args.reader, writer=args.writer, curator=args.curator, admin=args.admin,
                                       set_policy=args.set_policy, anonymous=args.publish)
        if args.table:
            [schema_name, table_name] = args.table.split(':')
            table = catalog.getCatalogModel().schemas[schema_name].tables[table_name]
            configure_table_defaults(catalog, table, set_policy=args.set_policy, anonymous=args.publish)
    except DerivaConfigError as e:
        print(e.msg)
    return
def main():
    parser = argparse.ArgumentParser(
        description="Configure an Ermrest Catalog")
    parser.add_argument('server', help='Catalog server name')
    parser.add_argument('--catalog-id',
                        default=1,
                        help="ID number of desired catalog (Default:1)")

    parser.add_argument('--table',
                        default=None,
                        metavar='SCHEMA_NAME:TABLE_NAME',
                        help='Name of table to be configured')
    parser.add_argument(
        '--asset-table',
        default=None,
        metavar='KEY_COLUMN',
        help='Create an asset table linked to table on key_column')
    parser.add_argument(
        '--config',
        default=None,
        help='python script to set up configuration variables)')

    args = parser.parse_args()

    credentials = get_credential(args.server)
    catalog = ErmrestCatalog('https',
                             args.server,
                             args.catalog_id,
                             credentials=credentials)

    if args.table:
        [schema_name, table_name] = args.table.split(':')
        table = catalog.getCatalogModel(
        ).schemas[schema_name].tables[table_name]
    if args.asset_table:
        if not args.table:
            print('Creating asset table requires specification of a table')
            exit(1)
        create_asset_table(catalog, table, args.asset_table)
Exemple #22
0
    def process(self):
        target_url_param = "target_url"
        target_url = self.parameters.get(target_url_param)
        if not target_url:
            raise DerivaDownloadConfigurationError(
                "%s is missing required parameter '%s' from %s" %
                (self.__class__.__name__, target_url_param,
                 PROCESSOR_PARAMS_KEY))
        if self.envars:
            target_url = target_url.format(**self.envars)
        target_url = target_url.strip(" ")
        upr = urlsplit(target_url, "https")
        self.scheme = upr.scheme.lower()
        self.netloc = upr.netloc
        self.path = upr.path.strip("/")
        host = urlunsplit((self.scheme, upr.netloc, "", "", ""))
        creds = get_credential(host)
        if not creds:
            logging.info("Unable to locate credential entry for: %s" % host)
        self.credentials = creds or dict()

        return self.outputs
Exemple #23
0
def init_variables(catalog_num=1):
    server = 'pbcconsortium.isrd.isi.edu'
    credential = get_credential(server)
    catalog = ErmrestCatalog('https',
                             server,
                             catalog_num,
                             credentials=credential)
    model_root = catalog.getCatalogModel()

    __main__.catalog = catalog
    __main__.model_root = model_root

    # Get references to main tables for manipulating the model.
    __main__.Experiment = model_root.table('Beta_Cell', 'Experiment')
    __main__.Specimen = model_root.table('Beta_Cell', 'Specimen')
    __main__.Biosample = model_root.table('Beta_Cell', 'Biosample')
    __main__.Dataset = model_root.table('Beta_Cell', 'Dataset')
    __main__.imaging_data = model_root.table('isa', 'imaging_data')
    __main__.model = model_root.table("viz", 'model')

    # Get references to the main tables for managing their contents using DataPath library
    pb = catalog.getPathBuilder()
    # Get main schema
    isa = pb.isa
    viz = pb.viz
    vocab = pb.vocab
    Beta_Cell = pb.Beta_Cell

    __main__.pb = pb
    __main__.isa = isa
    __main__.vocab = vocab

    # Get tables....
    __main__.Experiment_dp = Beta_Cell.Experiment
    __main__.Biosample_dp = Beta_Cell.Biosample
    __main__.dataset_dp = Beta_Cell.Dataset
    __main__.XRay_Tomography_dp = Beta_Cell.XRay_Tomography_Data
    __main__.Specimen_dp = Beta_Cell.Specimen
    __main__.model_dp = viz.model
Exemple #24
0
    def __init__(self,
                 scheme='https',
                 servername='app.nih-cfde.org',
                 catalog='registry',
                 credentials=None,
                 session_config=None):
        """Bind to specified registry.

        Note: this binding operates as an authenticated client
        identity and may expose different capabilities depending on
        the client's role within the organization.
        """
        if credentials is None:
            credentials = get_credential(servername)
        if not session_config:
            session_config = DEFAULT_SESSION_CONFIG.copy()
        session_config["allow_retry_on_all_methods"] = True
        self._catalog = ErmrestCatalog(scheme,
                                       servername,
                                       catalog,
                                       credentials,
                                       session_config=session_config)
        self._builder = self._catalog.getPathBuilder()
Exemple #25
0
from deriva.core import HatracStore, ErmrestCatalog, get_credential, DerivaPathError
import deriva.core.ermrest_model as em
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('hostname')
parser.add_argument('catalog_number')
parser.add_argument('schema_name')
args = parser.parse_args()


hostname = args.hostname
schema_name = args.schema_name
catalog_number = args.catalog_number

term_table = 'Step_Type'
term_comment = ''

credential = get_credential(hostname)
catalog = ErmrestCatalog('https', hostname, catalog_number, credentials=credential)

def create_vocabulary_table(catalog,term_table, term_comment):
    model_root = catalog.getCatalogModel()
    new_vocab_table = \
        model_root.schemas[schema_name].create_table(catalog, em.Table.define_vocabulary(term_table,'CORE:{RID}',comment=term_comment)
)

create_vocabulary_table(catalog,term_table,term_comment)
key_defs = [em.Key.define(['RID'], constraint_names=[('isa', 'BellCellStatus_RIDkey1')], ), ]

fkey_defs = []

table_def = em.Table.define(
    table_name,
    column_defs=column_defs,
    key_defs=key_defs,
    fkey_defs=fkey_defs,
    annotations=table_annotations,
    acls=table_acls,
    acl_bindings=table_acl_bindings,
    comment=table_comment,
    provide_system=True
)


def main(catalog, mode, replace=False):
    updater = CatalogUpdater(catalog)
    updater.update_table(mode, schema_name, table_def, replace=replace)


if __name__ == "__main__":
    host = 'pbcconsortium.isrd.isi.edu'
    catalog_id = 1
    mode, replace, host, catalog_id = parse_args(host, catalog_id, is_table=True)
    credential = get_credential(host)
    catalog = ErmrestCatalog('https', host, catalog_id, credentials=credential)
    main(catalog, mode, replace)

    utils.set_foreign_key_acls(catalog, 'PDB', 'ihm_hdx_restraint',
                               'ihm_hdx_restraint_structure_id_fkey',
                               foreign_key_acls)
    utils.set_foreign_key_acls(
        catalog, 'PDB', 'ihm_derived_angle_restraint',
        'ihm_derived_angle_restraint_structure_id_fkey', foreign_key_acls)
    utils.set_foreign_key_acls(
        catalog, 'PDB', 'ihm_derived_dihedral_restraint',
        'ihm_derived_dihedral_restraint_structure_id_fkey', foreign_key_acls)
    utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref',
                               'struct_ref_structure_id_fkey',
                               foreign_key_acls)
    utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref',
                               'struct_ref_db_name_fkey', foreign_key_acls)
    utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref_seq',
                               'struct_ref_seq_structure_id_fkey',
                               foreign_key_acls)
    utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref_seq_dif',
                               'struct_ref_seq_dif_structure_id_fkey',
                               foreign_key_acls)
    utils.set_foreign_key_acls(catalog, 'PDB', 'struct_ref_seq_dif',
                               'struct_ref_seq_dif_details_fkey',
                               foreign_key_acls)


if __name__ == '__main__':
    args = BaseCLI("ad-hoc table creation tool", None, 1).parse_cli()
    credentials = get_credential(args.host, args.credential_file)

    main(args.host, 99, credentials)
Exemple #28
0
                            fkey_defs=fkey_defs,
                            annotations=table_annotations,
                            acls=table_acls,
                            acl_bindings=table_acl_bindings,
                            comment=table_comment,
                            provide_system=True)


def main(catalog, mode, replace=False, really=False):
    updater = CatalogUpdater(catalog)
    table_def['column_annotations'] = column_annotations
    table_def['column_comment'] = column_comment
    updater.update_table(mode,
                         schema_name,
                         table_def,
                         replace=replace,
                         really=really)


if __name__ == "__main__":
    host = 'pdb.isrd.isi.edu'
    catalog_id = 99
    mode, replace, host, catalog_id = parse_args(host,
                                                 catalog_id,
                                                 is_table=True)
    catalog = ErmrestCatalog('https',
                             host,
                             catalog_id=catalog_id,
                             credentials=get_credential(host))
    main(catalog, mode, replace)
Exemple #29
0
    def __init__(self, server, **kwargs):
        self.server = server
        self.hostname = None
        self.catalog = None
        self.store = None
        self.cancelled = False
        self.output_dir = os.path.abspath(kwargs.get("output_dir", "."))
        self.envars = kwargs.get("envars", dict())
        self.config = kwargs.get("config")
        self.credentials = kwargs.get("credentials", dict())
        config_file = kwargs.get("config_file")
        credential_file = kwargs.get("credential_file")
        self.metadata = dict()
        self.sessions = dict()

        info = "%s v%s [Python %s, %s]" % (
            self.__class__.__name__, get_installed_version(VERSION),
            platform.python_version(), platform.platform(aliased=True))
        logging.info("Initializing downloader: %s" % info)

        if not self.server:
            raise DerivaDownloadConfigurationError("Server not specified!")

        # server variable initialization
        self.hostname = self.server.get('host', '')
        if not self.hostname:
            raise DerivaDownloadConfigurationError("Host not specified!")
        protocol = self.server.get('protocol', 'https')
        self.server_url = protocol + "://" + self.hostname
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # credential initialization
        token = kwargs.get("token")
        oauth2_token = kwargs.get("oauth2_token")
        username = kwargs.get("username")
        password = kwargs.get("password")
        if credential_file:
            self.credentials = get_credential(self.hostname, credential_file)
        elif token or oauth2_token or (username and password):
            self.credentials = format_credential(token=token,
                                                 oauth2_token=oauth2_token,
                                                 username=username,
                                                 password=password)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(protocol,
                                      self.hostname,
                                      catalog_id,
                                      self.credentials,
                                      session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(protocol,
                                 self.hostname,
                                 self.credentials,
                                 session_config=session_config)

        # init dcctx cid to a default
        self.set_dcctx_cid(self.__class__.__name__)

        # process config file
        if config_file:
            try:
                self.config = read_config(config_file)
            except Exception as e:
                raise DerivaDownloadConfigurationError(e)
Exemple #30
0
    def download(self, **kwargs):

        if not self.config:
            raise DerivaDownloadConfigurationError(
                "No configuration specified!")

        if self.config.get("catalog") is None:
            raise DerivaDownloadConfigurationError(
                "Catalog configuration error!")

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(''.join([
            os.path.join(self.output_dir, 'remote-file-manifest_'),
            str(uuid.uuid4()), ".json"
        ]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))
        self.envars.update({"hostname": self.hostname})

        # 1. If we don't have a client identity, we need to authenticate
        identity = kwargs.get("identity")
        if not identity:
            try:
                if not self.credentials:
                    self.set_credentials(get_credential(self.hostname))
                logging.info("Validating credentials for host: %s" %
                             self.hostname)
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except HTTPError as he:
                if he.response.status_code == 404:
                    logging.info(
                        "No existing login session found for host: %s" %
                        self.hostname)
            except Exception as e:
                raise DerivaDownloadAuthenticationError(
                    "Unable to validate credentials: %s" % format_exception(e))
        wallet = kwargs.get("wallet", {})

        # 2. Check for bagging config and initialize bag related variables
        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get(
                'bag_name', ''.join([
                    "deriva_bag", '_',
                    time.strftime("%Y-%m-%d_%H.%M.%S")
                ])).format(**self.envars)
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get(
                'bag_metadata',
                {"Internal-Sender-Identifier": "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path,
                                   algs=bag_algorithms,
                                   metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get(
                        "Contact-Name", None if not identity else identity.get(
                            'full_name',
                            identity.get('display_name',
                                         identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(
                        author_name=ro_author_name,
                        author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        # 3. Process the set of queries by locating, instantiating, and invoking the specified processor(s)
        outputs = dict()
        base_path = bag_path if bag_path else self.output_dir
        for processor in catalog_config['query_processors']:
            processor_name = processor["processor"]
            processor_type = processor.get('processor_type')
            processor_params = processor.get('processor_params')

            try:
                query_processor = find_query_processor(processor_name,
                                                       processor_type)
                processor = query_processor(
                    self.envars,
                    inputs=outputs,
                    bag=create_bag,
                    catalog=self.catalog,
                    store=self.store,
                    base_path=base_path,
                    processor_params=processor_params,
                    remote_file_manifest=remote_file_manifest,
                    ro_manifest=ro_manifest,
                    ro_author_name=ro_author_name,
                    ro_author_orcid=ro_author_orcid,
                    identity=identity,
                    wallet=wallet)
                outputs = processor.process()
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        # 4. Execute anything in the transform processing pipeline, if configured
        transform_processors = self.config.get('transform_processors', [])
        if transform_processors:
            for processor in transform_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    transform_processor = find_transform_processor(
                        processor_name, processor_type)
                    processor = transform_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        base_path=base_path,
                        bag=create_bag,
                        ro_manifest=ro_manifest,
                        ro_author_name=ro_author_name,
                        ro_author_orcid=ro_author_orcid,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        # 5. Create the bag, and archive (serialize) if necessary
        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(
                    bag_path,
                    algs=bag_algorithms,
                    remote_file_manifest=remote_file_manifest if
                    (remote_file_manifest
                     and os.path.getsize(remote_file_manifest) > 0) else None,
                    update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s" %
                              format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(
                        remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    outputs = {
                        os.path.basename(archive): {
                            LOCAL_PATH_KEY: archive
                        }
                    }
                except Exception as e:
                    logging.error(
                        "Exception while creating data bag archive: %s" %
                        format_exception(e))
                    raise
            else:
                outputs = {
                    os.path.basename(bag_path): {
                        LOCAL_PATH_KEY: bag_path
                    }
                }

        # 6. Execute anything in the post processing pipeline, if configured
        post_processors = self.config.get('post_processors', [])
        if post_processors:
            for processor in post_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    post_processor = find_post_processor(
                        processor_name, processor_type)
                    processor = post_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        return outputs