def __init__(self, hostname, catalogid, scheme='https', caching=True): session_config = DEFAULT_SESSION_CONFIG.copy() session_config["allow_retry_on_all_methods"] = True self.catalog = ErmrestCatalog(scheme, hostname, catalogid, caching=caching, session_config=session_config) self.builder = self.catalog.getPathBuilder()
def make_session_config(): """Return custom requests session_config for our data submission scenarios """ session_config = DEFAULT_SESSION_CONFIG.copy() session_config.update({ # our PUT/POST to ermrest is idempotent "allow_retry_on_all_methods": True, # do more retries before aborting "retry_read": 8, "retry_connect": 5, # increase delay factor * 2**(n-1) for Nth retry "retry_backoff_factor": 5, }) return session_config
def __init__(self, scheme='https', servername='app.nih-cfde.org', catalog='registry', credentials=None, session_config=None): """Bind to specified registry. Note: this binding operates as an authenticated client identity and may expose different capabilities depending on the client's role within the organization. """ if credentials is None: credentials = get_credential(servername) if not session_config: session_config = DEFAULT_SESSION_CONFIG.copy() session_config["allow_retry_on_all_methods"] = True self._catalog = ErmrestCatalog(scheme, servername, catalog, credentials, session_config=session_config) self._builder = self._catalog.getPathBuilder()
def configure(self, config, credential_file): self.config = config if config else read_config(self.config_file, create_default=True, default=DEFAULT_CONFIG) self.credential_file = credential_file host = self.config.get("host") if not host: self.set_current_html(ERROR_HTML % "Could not locate hostname parameter in configuration.") return self.auth_url = QUrl() self.auth_url.setScheme(config.get("protocol", "https")) self.auth_url.setHost(host) if config.get("port") is not None: self.auth_url.setPort(config["port"]) self.authn_cookie_name = self.config.get("cookie_name", "webauthn") self.cookie_file = DEFAULT_SESSION_CONFIG.get("cookie_jar") self.cookie_jar = load_cookies_from_file(self.cookie_file) retries = Retry(connect=DEFAULT_SESSION_CONFIG['retry_connect'], read=DEFAULT_SESSION_CONFIG['retry_read'], backoff_factor=DEFAULT_SESSION_CONFIG['retry_backoff_factor'], status_forcelist=DEFAULT_SESSION_CONFIG['retry_status_forcelist']) self._session.mount(self.auth_url.toString() + '/', HTTPAdapter(max_retries=retries))
def main(servername, subcommand, catalog_id=None): """Perform registry maintenance. :param servername: The DERIVA server where the registry should reside. :param subcommand: A named sub-command of this utility. :param catalog_id: The existing catalog ID, if any. Subcommands: - 'provision': Build a new registry catalog and report its ID (ignores catalog_id) - 'reprovision': Adjust existing registry model (required catalog_id) - 'reconfigure': Re-configure existing registry (requires catalog_id) - 'delete': Delete an existing (test) registry - 'creators-acl': Print ermrest creators ACL - 'dump-onboarding': Write out *.tsv files for onboarding info in registry """ init_logging(logging.INFO) credentials = get_credential(servername) session_config = DEFAULT_SESSION_CONFIG.copy() session_config["allow_retry_on_all_methods"] = True server = DerivaServer('https', servername, credentials, session_config=session_config) if subcommand == 'creators-acl': print(json.dumps(ermrest_creators_acl, indent=2)) return 0 if subcommand == 'provision': catalog = server.create_ermrest_catalog() print('Created new catalog %s' % catalog.catalog_id) elif subcommand in { 'reconfigure', 'delete', 'reprovision', 'dump-onboarding' }: if catalog_id is None: raise TypeError( 'missing 1 required positional argument: catalog_id') catalog = server.connect_ermrest(catalog_id) print('Connected to existing catalog %s' % catalog.catalog_id) else: raise ValueError('unknown subcommand %s' % subcommand) dp = CfdeDataPackage(registry_schema_json, RegistryConfigurator(catalog)) registry = Registry('https', servername, catalog.catalog_id, credentials) dp.set_catalog(catalog, registry) if subcommand == 'provision': # HACK: need to pre-populate ERMrest client w/ identities used in test data for submitting_user catalog.post( '/entity/public:ERMrest_Client?onconflict=skip', json=[{ 'ID': 'https://auth.globus.org/ad02dee8-d274-11e5-b4a0-8752ee3cf7eb', 'Display_Name': '*****@*****.**', 'Full_Name': 'Karl Czajkowski', 'Email': '*****@*****.**', 'Client_Object': {}, }]).raise_for_status() dp.provision() dp.load_data_files() # reconnect registry after provisioning registry = Registry('https', servername, catalog.catalog_id, credentials) dp.set_catalog(catalog, registry) dp.apply_custom_config() elif subcommand == 'reprovision': dp.provision(alter=True) dp.load_data_files(onconflict='update') dp.apply_custom_config() elif subcommand == 'reconfigure': dp.apply_custom_config() elif subcommand == 'delete': catalog.delete_ermrest_catalog(really=True) print('Deleted existing catalog %s' % catalog.catalog_id) elif subcommand == 'dump-onboarding': registry.dump_onboarding(dp) return 0
def __init__(self, parent, server): super(ServerDialog, self).__init__(parent) self.server = server self.session_config = self.server.get('session', DEFAULT_SESSION_CONFIG.copy()) self.setWindowTitle("Server Configuration") self.setWindowFlags(self.windowFlags() & ~Qt.WindowContextHelpButtonHint) self.setMinimumWidth(400) layout = QVBoxLayout(self) self.serverLayout = QVBoxLayout(self) self.serverGroupBox = QGroupBox("Server:", self) self.hostnameLayout = QHBoxLayout() self.hostnameLabel = QLabel("Host:") self.hostnameLayout.addWidget(self.hostnameLabel) self.hostnameTextBox = QLineEdit() self.hostnameTextBox.setText(server.get("host", "")) self.hostnameLayout.addWidget(self.hostnameTextBox) self.serverLayout.addLayout(self.hostnameLayout) self.descriptionLayout = QHBoxLayout() self.descriptionLabel = QLabel("Description:") self.descriptionLayout.addWidget(self.descriptionLabel) self.descriptionTextBox = QLineEdit() self.descriptionTextBox.setText(server.get("desc", "")) self.descriptionLayout.addWidget(self.descriptionTextBox) self.serverLayout.addLayout(self.descriptionLayout) self.catalogIDLayout = QHBoxLayout() self.catalogIDLabel = QLabel("Catalog ID:") self.catalogIDLayout.addWidget(self.catalogIDLabel) self.catalogIDTextBox = QLineEdit() self.catalogIDTextBox.setText(str(server.get("catalog_id", 1))) self.catalogIDLayout.addWidget(self.catalogIDTextBox) self.serverLayout.addLayout(self.catalogIDLayout) self.serverGroupBox.setLayout(self.serverLayout) layout.addWidget(self.serverGroupBox) # connect timeout/retry settings self.serverConnectTimeoutsGroupBox = QGroupBox( "Connection Timeout and Retries:", self) self.connectLayout = QHBoxLayout() # connect timeout self.connectTimeoutLabel = QLabel("Connect timeout (secs):") self.connectLayout.addWidget(self.connectTimeoutLabel) self.connectTimeoutSpinBox = QSpinBox(parent) self.connectTimeoutSpinBox.setRange(1, 60) self.connectTimeoutSpinBox.setValue( self.session_config.get("timeout")[0] or 6) self.connectLayout.addWidget(self.connectTimeoutSpinBox) # connect retry self.connectRetryLabel = QLabel("Connect retries:") self.connectLayout.addWidget(self.connectRetryLabel) self.connectRetrySpinBox = QSpinBox(parent) self.connectRetrySpinBox.setRange(1, 60) self.connectRetrySpinBox.setValue( self.session_config.get("retry_connect", 10)) self.connectLayout.addWidget(self.connectRetrySpinBox) self.serverConnectTimeoutsGroupBox.setLayout(self.connectLayout) layout.addWidget(self.serverConnectTimeoutsGroupBox) # io timeout/retry settings self.serverIOTimeoutsGroupBox = QGroupBox("I/O Timeout and Retries:", self) self.ioLayout = QHBoxLayout() # io timeout self.ioTimeoutLabel = QLabel("I/O timeout (secs):") self.ioLayout.addWidget(self.ioTimeoutLabel) self.ioTimeoutSpinBox = QSpinBox(parent) self.ioTimeoutSpinBox.setRange(1, 600) self.ioTimeoutSpinBox.setValue( self.session_config.get("timeout")[1] or 60) self.ioLayout.addWidget(self.ioTimeoutSpinBox) # io retry self.ioRetryLabel = QLabel("I/O retries:") self.ioLayout.addWidget(self.ioRetryLabel) self.ioRetrySpinBox = QSpinBox(parent) self.ioRetrySpinBox.setRange(1, 60) self.ioRetrySpinBox.setValue(self.session_config.get("retry_read", 10)) self.ioLayout.addWidget(self.ioRetrySpinBox) self.serverIOTimeoutsGroupBox.setLayout(self.ioLayout) layout.addWidget(self.serverIOTimeoutsGroupBox) setServers = getattr(parent.uploader, "setServers", None) self.serversConfigurable = True if callable(setServers) else False self.serverOptionsGroupBox = QGroupBox("Options:", self) self.checkboxLayout = QHBoxLayout() self.defaultServer = QCheckBox("Set as &Default", parent) self.defaultServer.setChecked(stob(server.get("default", False))) self.defaultServer.setEnabled(self.serversConfigurable) self.checkboxLayout.addWidget(self.defaultServer) self.confirm_updates = QCheckBox("&Confirm configuration updates", parent) self.confirm_updates.setChecked( stob(server.get("confirm_updates", False))) self.confirm_updates.setEnabled(self.serversConfigurable) self.checkboxLayout.addWidget(self.confirm_updates) self.cookie_persistence = QCheckBox("&Stay logged in", parent) allow_session_caching = True if parent.uploader.config: client_settings = parent.uploader.config.get("client_settings") if client_settings: allow_session_caching = stob( client_settings.get("allow_session_caching", True)) if not allow_session_caching: server["cookie_persistence"] = False self.cookie_persistence.setChecked( stob(server.get("cookie_persistence", False))) self.cookie_persistence.setEnabled(self.serversConfigurable and allow_session_caching) self.checkboxLayout.addWidget(self.cookie_persistence) self.serverOptionsGroupBox.setLayout(self.checkboxLayout) layout.addWidget(self.serverOptionsGroupBox) # Button Box self.buttonBox = QDialogButtonBox(parent) self.buttonBox.setObjectName("buttonBox") self.buttonBox.setOrientation(Qt.Horizontal) self.buttonBox.setStandardButtons(QDialogButtonBox.Cancel | QDialogButtonBox.Ok) self.buttonBox.accepted.connect(self.accept) self.buttonBox.rejected.connect(self.reject) layout.addWidget(self.buttonBox)
def deriva_ingest(servername, archive_url, deriva_webauthn_user, dcc_id=None, globus_ep=None, action_id=None): """Perform an ingest to DERIVA into a catalog, using the CfdeDataPackage. Arguments: servername (str): The name of the DERIVA server. data_json_file (str): The path to the JSON file with TableSchema data. catalog_id (str or int): If updating an existing catalog, the existing catalog ID. Default None, to create a new catalog. acls (dict): The ACLs to set on the catalog. Default None to use default ACLs. Returns: dict: The result of the ingest. success (bool): True when the ingest was successful. catalog_id (str): The catalog's ID. """ credential = { "bearer-token": get_app_token(CONFIG["DEPENDENT_SCOPES"]["deriva_all"]) } session_config = DEFAULT_SESSION_CONFIG.copy() session_config["allow_retry_on_all_methods"] = True registry = Registry('https', servername, credentials=credential, session_config=session_config) server = DerivaServer('https', servername, credential, session_config=session_config) # the Globus action_id is used as the Submission id, this allows us to track submissions # in Deriva back to an action. submission_id = action_id logger.info( f'Submitting new dataset into Deriva using submission id {submission_id}' ) # pre-flight check like action provider might want to do? # this is optional, implicitly happening again in Submission(...) registry.validate_dcc_id(dcc_id, deriva_webauthn_user) # The Header map protects from submitting our https_token to non-Globus URLs. This MUST # match, otherwise the Submission() client will attempt to download the Globus GCS Auth # login page instead. r"https://[^/]*[.]data[.]globus[.]org/.*" will match most GCS HTTP pages, # but if a custom domain is used this MUST be updated to use that instead. https_token = get_app_token( f'https://auth.globus.org/scopes/{globus_ep}/https') header_map = { CONFIG['ALLOWED_GCS_HTTPS_HOSTS']: { "Authorization": f"Bearer {https_token}" } } submission = Submission(server, registry, submission_id, dcc_id, archive_url, deriva_webauthn_user, archive_headers_map=header_map) submission.ingest() md = registry.get_datapackage(submission_id) success = md["status"] == DERIVA_INGEST_SUCCESS return { # status must be a valid automate status ['SUCCEEDED', 'FAILED', 'ACTIVE', 'INACTIVE'] "status": "SUCCEEDED" if success else "FAILED", # Note: The automate flow expects an error to be False if there are no errors. # Below ensures in any falsy value from the registry to set error=False so # Automate knows there are no errors. "error": md.get('diagnostics') or False, "message": "DERIVA ingest successful" if success else "", "submission_id": submission_id, "submission_link": md["review_browse_url"] }