Example #1
0
    def _uploadAsset(self,
                     file_path,
                     asset_mapping,
                     match_groupdict,
                     callback=None):

        # 1. Populate metadata by querying the catalog
        self._queryFileMetadata(file_path, asset_mapping, match_groupdict)

        # 2. If "create_record_before_upload" specified in asset_mapping, check for an existing record, creating a new
        #    one if necessary. Otherwise delay this logic until after the file upload.
        record = None
        if stob(asset_mapping.get("create_record_before_upload", False)):
            record = self._getFileRecord(asset_mapping)

        # 3. Perform the Hatrac upload
        self._getFileHatracMetadata(asset_mapping)
        hatrac_options = asset_mapping.get("hatrac_options", {})
        versioned_uri = \
            self._hatracUpload(self.metadata["URI"],
                               file_path,
                               md5=self.metadata.get("md5_base64"),
                               sha256=self.metadata.get("sha256_base64"),
                               content_type=self.guessContentType(file_path),
                               content_disposition=self.metadata.get("content-disposition"),
                               chunked=True,
                               create_parents=stob(hatrac_options.get("create_parents", True)),
                               allow_versioning=stob(hatrac_options.get("allow_versioning", True)),
                               callback=callback)
        logging.debug("Hatrac upload successful. Result object URI: %s" %
                      versioned_uri)
        if stob(hatrac_options.get("versioned_uris", True)):
            self.metadata["URI"] = versioned_uri
        else:
            self.metadata["URI"] = versioned_uri.rsplit(":")[0]
        self.metadata["URI_urlencoded"] = urlquote(self.metadata["URI"])

        # 3. Check for an existing record and create a new one if necessary
        if not record:
            record = self._getFileRecord(asset_mapping)

        # 4. Update an existing record, if necessary
        column_map = asset_mapping.get("column_map", {})
        updated_record = self.interpolateDict(self.metadata, column_map)
        if updated_record != record:
            logging.info("Updating catalog for file [%s]" %
                         self.getFileDisplayName(file_path))
            self._catalogRecordUpdate(self.metadata['target_table'], record,
                                      updated_record)
    def onUpdateConfigResult(self, success, status, detail, result):
        self.restoreCursor()
        if not success:
            self.resetUI(status, detail)
            return
        if not result:
            return
        confirm_updates = stob(self.uploader.server.get("confirm_updates", False))
        if confirm_updates:
            msg = QMessageBox()
            msg.setIcon(QMessageBox.Information)
            msg.setWindowTitle("Updated Configuration Available")
            msg.setText("Apply updated configuration?")
            msg.setInformativeText(
                "Selecting \"Yes\" will apply the latest configuration from the server and overwrite the existing "
                "default configuration file.\n\nSelecting \"No\" will ignore these updates and continue to use the "
                "existing configuration.\n\nYou should always apply the latest configuration changes from the server "
                "unless you understand the risk involved with using a potentially out-of-date configuration.")

            msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
            ret = msg.exec_()
            if ret == QMessageBox.No:
                return

        write_config(self.uploader.getDeployedConfigFilePath(), result)
        self.uploader.initialize(cleanup=False)
        if not self.checkVersion():
            return
        self.on_actionRescan_triggered()
Example #3
0
    def POST(self):
        require_authentication = stob(
            self.config.get("require_authentication", True))
        if require_authentication:
            self.check_authenticated()
        purge_output_dirs(self.config.get("dir_auto_purge_threshold", 5))
        key, output_dir = create_output_dir()
        url = ''.join([
            web.ctx.home, web.ctx.path,
            '/' if not web.ctx.path.endswith("/") else "", key
        ])
        params = self.parse_querystr(web.ctx.query)
        public = stob(params.get("public", False))

        # perform the export
        output = export(
            config=json.loads(web.data().decode()),
            base_dir=output_dir,
            service_url=url,
            public=public,
            quiet=stob(self.config.get("quiet_logging", False)),
            propagate_logs=stob(self.config.get("propagate_logs", True)),
            require_authentication=require_authentication,
            allow_anonymous_download=stob(
                self.config.get("allow_anonymous_download", False)),
            max_payload_size_mb=self.config.get("max_payload_size_mb"),
            timeout=self.config.get("timeout_secs"),
            dcctx_cid="export/bag",
            request_ip=get_client_ip())
        output_metadata = list(output.values())[0] or {}

        set_location_header = False
        identifier_landing_page = output_metadata.get(
            "identifier_landing_page")
        if identifier_landing_page:
            url = [identifier_landing_page, url]
            set_location_header = True
        else:
            identifier = output_metadata.get("identifier")
            if identifier:
                url = [
                    "https://identifiers.org/" + identifier,
                    "https://n2t.net/" + identifier, url
                ]
                set_location_header = True

        return self.create_response(url, set_location_header)
Example #4
0
    def POST(self):
        require_authentication = stob(
            self.config.get("require_authentication", True))
        if require_authentication:
            self.check_authenticated()
        purge_output_dirs(self.config.get("dir_auto_purge_threshold", 5))
        key, output_dir = create_output_dir()
        url = ''.join([
            web.ctx.home, web.ctx.path,
            '/' if not web.ctx.path.endswith("/") else "", key
        ])
        params = self.parse_querystr(web.ctx.query)
        public = stob(params.get("public", False))

        # perform the export
        output = export(
            config=json.loads(web.data().decode()),
            base_dir=output_dir,
            service_url=url,
            files_only=True,
            public=public,
            quiet=stob(self.config.get("quiet_logging", False)),
            propagate_logs=stob(self.config.get("propagate_logs", True)),
            require_authentication=require_authentication,
            allow_anonymous_download=stob(
                self.config.get("allow_anonymous_download", False)),
            max_payload_size_mb=self.config.get("max_payload_size_mb"),
            timeout=self.config.get("timeout_secs"),
            dcctx_cid="export/file")
        uri_list = list()
        set_location_header = False if len(output.keys()) > 1 else True
        for file_path, file_metadata in output.items():
            remote_paths = file_metadata.get(
                GenericDownloader.REMOTE_PATHS_KEY)
            if remote_paths:
                target_url = remote_paths[0]
            else:
                target_url = ''.join([url, str('/%s' % file_path)])
            uri_list.append(target_url)

        return self.create_response(uri_list, set_location_header)
 def __init__(self, envars=None, **kwargs):
     super(BaseTransformProcessor, self).__init__(envars, **kwargs)
     self.base_path = kwargs["base_path"]
     self.input_paths = self.parameters.get("input_paths", [])
     if not self.input_paths:
         self.input_paths = [self.parameters["input_path"]
                             ]  # for backward compatibility
     self.sub_path = self.parameters.get("output_path", "")
     self.is_bag = kwargs.get("bag", False)
     self.transformed_output = self.outputs.get(self.input_path, dict())
     self.url = self.transformed_output.get(SOURCE_URL_KEY)
     self.ro_file_provenance = stob(
         self.parameters.get("ro_file_provenance",
                             False if not self.is_bag else True))
     self.ro_manifest = self.kwargs.get("ro_manifest")
     self.ro_author_name = self.kwargs.get("ro_author_name")
     self.ro_author_orcid = self.kwargs.get("ro_author_orcid")
     self.delete_input = stob(self.parameters.get("delete_input", True))
     self.input_relpaths = []
     self.input_abspaths = []
     self.output_relpath = None
     self.output_abspath = None
 def checkAllowSessionCaching(self):
     client_settings = self.uploader.config.get("client_settings")
     if not client_settings:
         return
     allow_session_caching = stob(client_settings.get("allow_session_caching", True))
     cookie_persistence = self.uploader.server.get("cookie_persistence", False)
     if cookie_persistence != allow_session_caching:
         if not allow_session_caching:
             self.uploader.server["cookie_persistence"] = False
             servers = list()
             for server in self.uploader.getServers():
                 if server.get("host", "") != self.uploader.server.get("host"):
                     servers.append(server)
             servers.append(self.uploader.server)
             setServers = getattr(self.uploader, "setServers", None)
             if callable(setServers):
                 setServers(servers)
Example #7
0
 def __init__(self, envars=None, **kwargs):
     super(BaseQueryProcessor, self).__init__(envars, **kwargs)
     self.catalog = kwargs["catalog"]
     self.store = kwargs["store"]
     self.base_path = kwargs["base_path"]
     self.query = self.parameters["query_path"]
     if self.envars:
         self.query = self.query.format(**self.envars)
     self.sub_path = self.parameters.get("output_path", "")
     self.store_base = kwargs.get("store_base", "/hatrac/")
     self.is_bag = kwargs.get("bag", False)
     self.sessions = kwargs.get("sessions", dict())
     self.content_type = "application/octet-stream"
     self.url = ''.join([self.catalog.get_server_uri(), self.query])
     self.ro_file_provenance = stob(self.parameters.get("ro_file_provenance", False if not self.is_bag else True))
     self.ro_manifest = self.kwargs.get("ro_manifest")
     self.ro_author_name = self.kwargs.get("ro_author_name")
     self.ro_author_orcid = self.kwargs.get("ro_author_orcid")
     self.output_relpath = None
     self.output_abspath = None
    def process(self):
        config_file = self.parameters.get("minid_client_config", self.MINID.minid_client_api.DEFAULT_CONFIG_FILE)
        config = self.MINID.minid_client_api.parse_config(config_file)
        server = config.get("minid_server", "http://minid.bd2k.org/minid")
        email = config.get("email", self.identity.get("email"))
        code = config.get("code")
        for k, v in self.outputs.items():
            file_path = v[LOCAL_PATH_KEY]
            self.make_file_output_values(file_path, v)
            checksum = v[SHA256_KEY][0]
            locations = v.get(REMOTE_PATHS_KEY)
            if not locations:
                raise DerivaDownloadConfigurationError(
                    "Invalid URLs: One or more location URLs must be specified when registering an identifier.")
            result = self.MINID.minid_client_api.register_entity(
                server, checksum, email, code,
                url=locations, title=self.parameters.get("title", ""),
                test=stob(self.parameters.get("test", "False")),
                globus_auth_token=None, checksum_function=None)
            v[IDENTIFIER_KEY] = result

        return self.outputs
    def process(self):
        ic = self.load_identifier_client()
        test = stob(self.parameters.get("test", "False"))
        namespace = (self.TEST_IDENTIFIER_NAMESPACE if test else self.IDENTIFIER_NAMESPACE)
        for k, v in self.outputs.items():
            file_path = v[LOCAL_PATH_KEY]
            self.make_file_output_values(file_path, v)
            checksum = v[SHA256_KEY][0]
            title = self.parameters.get("title", "DERIVA Export: %s" % k)
            metadata = {"title": title}
            visible_to = self.parameters.get("visible_to", ["public"])
            locations = v.get(REMOTE_PATHS_KEY)
            if not locations:
                raise DerivaDownloadConfigurationError(
                    "Invalid URLs: One or more location URLs must be specified when registering an identifier.")

            kwargs = {
                "namespace": namespace,
                "visible_to": visible_to,
                "location": locations,
                "checksums": [{
                    "function": "sha256",
                    "value": checksum
                }],
                "metadata": metadata
            }
            try:
                logging.info("Attempting to create identifier for file [%s] with locations: %s" %
                             (file_path, locations))
                minid = ic.create_identifier(**kwargs)
                identifier = minid["identifier"]
                v[IDENTIFIER_KEY] = identifier
                v[IDENTIFIER_LANDING_PAGE] = self.GLOBUS_IDENTIFIER_SERVICE + identifier
            except self.GLOBUS_IDENTIFIER_CLIENT.identifiers_api.IdentifierClientError as e:
                raise DerivaDownloadError("Unable to create identifier: %s" % e.message)

        return self.outputs
Example #10
0
    def download(self, **kwargs):

        if not self.config:
            raise DerivaDownloadConfigurationError(
                "No configuration specified!")

        if self.config.get("catalog") is None:
            raise DerivaDownloadConfigurationError(
                "Catalog configuration error!")

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(''.join([
            os.path.join(self.output_dir, 'remote-file-manifest_'),
            str(uuid.uuid4()), ".json"
        ]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))
        self.envars.update({"hostname": self.hostname})

        # 1. If we don't have a client identity, we need to authenticate
        identity = kwargs.get("identity")
        if not identity:
            try:
                if not self.credentials:
                    self.set_credentials(get_credential(self.hostname))
                logging.info("Validating credentials for host: %s" %
                             self.hostname)
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except HTTPError as he:
                if he.response.status_code == 404:
                    logging.info(
                        "No existing login session found for host: %s" %
                        self.hostname)
            except Exception as e:
                raise DerivaDownloadAuthenticationError(
                    "Unable to validate credentials: %s" % format_exception(e))
        wallet = kwargs.get("wallet", {})

        # 2. Check for bagging config and initialize bag related variables
        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get(
                'bag_name', ''.join([
                    "deriva_bag", '_',
                    time.strftime("%Y-%m-%d_%H.%M.%S")
                ])).format(**self.envars)
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get(
                'bag_metadata',
                {"Internal-Sender-Identifier": "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path,
                                   algs=bag_algorithms,
                                   metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get(
                        "Contact-Name", None if not identity else identity.get(
                            'full_name',
                            identity.get('display_name',
                                         identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(
                        author_name=ro_author_name,
                        author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        # 3. Process the set of queries by locating, instantiating, and invoking the specified processor(s)
        outputs = dict()
        base_path = bag_path if bag_path else self.output_dir
        for processor in catalog_config['query_processors']:
            processor_name = processor["processor"]
            processor_type = processor.get('processor_type')
            processor_params = processor.get('processor_params')

            try:
                query_processor = find_query_processor(processor_name,
                                                       processor_type)
                processor = query_processor(
                    self.envars,
                    inputs=outputs,
                    bag=create_bag,
                    catalog=self.catalog,
                    store=self.store,
                    base_path=base_path,
                    processor_params=processor_params,
                    remote_file_manifest=remote_file_manifest,
                    ro_manifest=ro_manifest,
                    ro_author_name=ro_author_name,
                    ro_author_orcid=ro_author_orcid,
                    identity=identity,
                    wallet=wallet)
                outputs = processor.process()
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        # 4. Execute anything in the transform processing pipeline, if configured
        transform_processors = self.config.get('transform_processors', [])
        if transform_processors:
            for processor in transform_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    transform_processor = find_transform_processor(
                        processor_name, processor_type)
                    processor = transform_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        base_path=base_path,
                        bag=create_bag,
                        ro_manifest=ro_manifest,
                        ro_author_name=ro_author_name,
                        ro_author_orcid=ro_author_orcid,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        # 5. Create the bag, and archive (serialize) if necessary
        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(
                    bag_path,
                    algs=bag_algorithms,
                    remote_file_manifest=remote_file_manifest if
                    (remote_file_manifest
                     and os.path.getsize(remote_file_manifest) > 0) else None,
                    update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s" %
                              format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(
                        remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    outputs = {
                        os.path.basename(archive): {
                            LOCAL_PATH_KEY: archive
                        }
                    }
                except Exception as e:
                    logging.error(
                        "Exception while creating data bag archive: %s" %
                        format_exception(e))
                    raise
            else:
                outputs = {
                    os.path.basename(bag_path): {
                        LOCAL_PATH_KEY: bag_path
                    }
                }

        # 6. Execute anything in the post processing pipeline, if configured
        post_processors = self.config.get('post_processors', [])
        if post_processors:
            for processor in post_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    post_processor = find_post_processor(
                        processor_name, processor_type)
                    processor = post_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        return outputs
Example #11
0
    def process(self):
        super(Boto3UploadPostProcessor, self).process()
        key = self.credentials.get("key")
        secret = self.credentials.get("secret")
        token = self.credentials.get("token")
        role_arn = self.parameters.get("role_arn")
        profile_name = self.parameters.get("profile")
        region_name = self.parameters.get("region")
        try:
            session = self.BOTO3.session.Session(profile_name=profile_name,
                                                 region_name=region_name)
        except Exception as e:
            raise DerivaDownloadConfigurationError(
                "Unable to create Boto3 session: %s" % format_exception(e))

        if role_arn:
            try:
                sts = session.client('sts')
                response = sts.assume_role(RoleArn=role_arn,
                                           RoleSessionName='DERIVA-Export',
                                           DurationSeconds=3600)
                temp_credentials = response['Credentials']
                key = temp_credentials['AccessKeyId']
                secret = temp_credentials['SecretAccessKey']
                token = temp_credentials['SessionToken']
            except Exception as e:
                raise RuntimeError(
                    "Unable to get temporary credentials using arn [%s]. %s" %
                    (role_arn, get_typed_exception(e)))

        try:
            if self.scheme == "gs":
                endpoint_url = "https://storage.googleapis.com"
                config = self.BOTO3.session.Config(signature_version="s3v4")
                kwargs = {
                    "aws_access_key_id": key,
                    "aws_secret_access_key": secret,
                    "endpoint_url": endpoint_url,
                    "config": config
                }
            else:
                kwargs = {
                    "aws_access_key_id": key,
                    "aws_secret_access_key": secret
                }
                if token:
                    kwargs.update({"aws_session_token": token})

            s3_client = session.client("s3", **kwargs)
            kwargs["config"] = self.BOTO3.session.Config(
                signature_version=self.BOTOCORE.UNSIGNED)
            s3_client_unsigned = self.BOTO3.client('s3', **kwargs)
        except Exception as e:
            raise DerivaDownloadError(
                "Unable to create Boto3 storage client: %s" %
                format_exception(e))

        bucket_name = self.netloc
        bucket_exists = True
        try:
            s3_client.head_bucket(Bucket=bucket_name)
        except self.BOTOCORE.exceptions.ClientError as e:
            # If a client error is thrown, then check that it was a 404 error.
            # If it was a 404 error, then the bucket does not exist.
            error_code = int(e.response["Error"]["Code"])
            if error_code == 404:
                bucket_exists = False
        except Exception as e:
            raise DerivaDownloadError("Unable to query target bucket: %s" %
                                      format_exception(e))

        if not bucket_exists:
            raise DerivaDownloadError("Target bucket [%s] does not exist." %
                                      bucket_name)

        object_qualifier = os.path.basename(self.identity.get(
            "id", "")) or "anon-" + str(uuid.uuid4())
        if not stob(self.parameters.get("overwrite", "False")):
            object_qualifier = "/".join([
                object_qualifier,
                datetime.strftime(datetime.now(), "%Y-%m-%d_%H.%M.%S")
            ])

        for k, v in self.outputs.items():
            object_name = "/".join([self.path, object_qualifier, k])
            file_path = v[LOCAL_PATH_KEY]
            acl = self.parameters.get("acl", "private")
            signed_url = stob(
                self.parameters.get("signed_url", acl == "public-read"))
            if signed_url:
                client = s3_client_unsigned if acl == "public-read" else s3_client
                remote_path = client.generate_presigned_url('get_object',
                                                            Params={
                                                                'Bucket':
                                                                bucket_name,
                                                                'Key':
                                                                object_name
                                                            })
            else:
                remote_path = urlunsplit(
                    (self.scheme, self.netloc, object_name, "", ""))
            logging.info("Uploading file [%s] to: %s" %
                         (file_path, remote_path))
            remote_paths = v.get(REMOTE_PATHS_KEY, list())
            remote_paths.append(remote_path)
            v[REMOTE_PATHS_KEY] = remote_paths
            self.make_file_output_values(file_path, v)
            with open(file_path, "rb") as input_file:
                try:
                    response = s3_client.put_object(
                        ACL=acl,
                        Bucket=bucket_name,
                        Key=object_name,
                        Body=input_file,
                        ContentType=v[CONTENT_TYPE_KEY],
                        ContentLength=v[FILE_SIZE_KEY],
                        ContentMD5=v[MD5_KEY][1],
                        Metadata={"Content-MD5": v[MD5_KEY][0]})
                except Exception as e:
                    raise DerivaDownloadError(
                        "Upload of %s failed: %s" %
                        (remote_path, format_exception(e)))

        return self.outputs
Example #12
0
    def download(self, identity=None):

        if not self.config:
            raise RuntimeError("No configuration specified!")

        if self.config.get("catalog") is None:
            raise RuntimeError("Catalog configuration error!")

        if not identity:
            logging.info("Validating credentials")
            try:
                if not self.credentials:
                    self.setCredentials(get_credential(self.hostname))
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except Exception as e:
                raise RuntimeError("Unable to validate credentials: %s" % format_exception(e))

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(
            ''.join([os.path.join(self.output_dir, 'remote-file-manifest_'), str(uuid.uuid4()), ".json"]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))

        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get('bag_name', ''.join(["deriva_bag", '_', time.strftime("%Y-%m-%d_%H.%M.%S")]))
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get('bag_metadata', {"Internal-Sender-Identifier":
                                                           "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get("Contact-Name",
                                                  identity.get('full_name',
                                                               identity.get('display_name',
                                                                            identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(author_name=ro_author_name, author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        file_list = list()
        base_path = bag_path if bag_path else self.output_dir
        for query in catalog_config['queries']:
            query_path = query['query_path']
            output_format = query['output_format']
            output_processor = query.get("output_format_processor")
            format_args = query.get('output_format_params', None)
            output_path = query.get('output_path', '')

            try:
                download_processor = findProcessor(output_format, output_processor)
                processor = download_processor(self.envars,
                                               bag=create_bag,
                                               catalog=self.catalog,
                                               store=self.store,
                                               query=query_path,
                                               base_path=base_path,
                                               sub_path=output_path,
                                               format_args=format_args,
                                               remote_file_manifest=remote_file_manifest,
                                               ro_manifest=ro_manifest,
                                               ro_author_name=ro_author_name,
                                               ro_author_orcid=ro_author_orcid)
                file_list.extend(processor.process())
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(bag_path, algs=bag_algorithms, remote_file_manifest=remote_file_manifest, update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s", format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    return [archive]
                except Exception as e:
                    logging.error("Exception while creating data bag archive:", format_exception(e))
                    raise
            else:
                return [bag_path]

        return file_list
Example #13
0
    def __init__(self, parent, server):
        super(ServerDialog, self).__init__(parent)
        self.server = server
        self.session_config = self.server.get('session',
                                              DEFAULT_SESSION_CONFIG.copy())
        self.setWindowTitle("Server Configuration")
        self.setWindowFlags(self.windowFlags()
                            & ~Qt.WindowContextHelpButtonHint)
        self.setMinimumWidth(400)
        layout = QVBoxLayout(self)

        self.serverLayout = QVBoxLayout(self)
        self.serverGroupBox = QGroupBox("Server:", self)
        self.hostnameLayout = QHBoxLayout()
        self.hostnameLabel = QLabel("Host:")
        self.hostnameLayout.addWidget(self.hostnameLabel)
        self.hostnameTextBox = QLineEdit()
        self.hostnameTextBox.setText(server.get("host", ""))
        self.hostnameLayout.addWidget(self.hostnameTextBox)
        self.serverLayout.addLayout(self.hostnameLayout)

        self.descriptionLayout = QHBoxLayout()
        self.descriptionLabel = QLabel("Description:")
        self.descriptionLayout.addWidget(self.descriptionLabel)
        self.descriptionTextBox = QLineEdit()
        self.descriptionTextBox.setText(server.get("desc", ""))
        self.descriptionLayout.addWidget(self.descriptionTextBox)
        self.serverLayout.addLayout(self.descriptionLayout)

        self.catalogIDLayout = QHBoxLayout()
        self.catalogIDLabel = QLabel("Catalog ID:")
        self.catalogIDLayout.addWidget(self.catalogIDLabel)
        self.catalogIDTextBox = QLineEdit()
        self.catalogIDTextBox.setText(str(server.get("catalog_id", 1)))
        self.catalogIDLayout.addWidget(self.catalogIDTextBox)
        self.serverLayout.addLayout(self.catalogIDLayout)
        self.serverGroupBox.setLayout(self.serverLayout)
        layout.addWidget(self.serverGroupBox)

        # connect timeout/retry settings
        self.serverConnectTimeoutsGroupBox = QGroupBox(
            "Connection Timeout and Retries:", self)
        self.connectLayout = QHBoxLayout()
        # connect timeout
        self.connectTimeoutLabel = QLabel("Connect timeout (secs):")
        self.connectLayout.addWidget(self.connectTimeoutLabel)
        self.connectTimeoutSpinBox = QSpinBox(parent)
        self.connectTimeoutSpinBox.setRange(1, 60)
        self.connectTimeoutSpinBox.setValue(
            self.session_config.get("timeout")[0] or 6)
        self.connectLayout.addWidget(self.connectTimeoutSpinBox)
        # connect retry
        self.connectRetryLabel = QLabel("Connect retries:")
        self.connectLayout.addWidget(self.connectRetryLabel)
        self.connectRetrySpinBox = QSpinBox(parent)
        self.connectRetrySpinBox.setRange(1, 60)
        self.connectRetrySpinBox.setValue(
            self.session_config.get("retry_connect", 10))
        self.connectLayout.addWidget(self.connectRetrySpinBox)
        self.serverConnectTimeoutsGroupBox.setLayout(self.connectLayout)
        layout.addWidget(self.serverConnectTimeoutsGroupBox)

        # io timeout/retry settings
        self.serverIOTimeoutsGroupBox = QGroupBox("I/O Timeout and Retries:",
                                                  self)
        self.ioLayout = QHBoxLayout()
        # io timeout
        self.ioTimeoutLabel = QLabel("I/O timeout (secs):")
        self.ioLayout.addWidget(self.ioTimeoutLabel)
        self.ioTimeoutSpinBox = QSpinBox(parent)
        self.ioTimeoutSpinBox.setRange(1, 600)
        self.ioTimeoutSpinBox.setValue(
            self.session_config.get("timeout")[1] or 60)
        self.ioLayout.addWidget(self.ioTimeoutSpinBox)
        # io retry
        self.ioRetryLabel = QLabel("I/O retries:")
        self.ioLayout.addWidget(self.ioRetryLabel)
        self.ioRetrySpinBox = QSpinBox(parent)
        self.ioRetrySpinBox.setRange(1, 60)
        self.ioRetrySpinBox.setValue(self.session_config.get("retry_read", 10))
        self.ioLayout.addWidget(self.ioRetrySpinBox)
        self.serverIOTimeoutsGroupBox.setLayout(self.ioLayout)
        layout.addWidget(self.serverIOTimeoutsGroupBox)

        setServers = getattr(parent.uploader, "setServers", None)
        self.serversConfigurable = True if callable(setServers) else False
        self.serverOptionsGroupBox = QGroupBox("Options:", self)
        self.checkboxLayout = QHBoxLayout()
        self.defaultServer = QCheckBox("Set as &Default", parent)
        self.defaultServer.setChecked(stob(server.get("default", False)))
        self.defaultServer.setEnabled(self.serversConfigurable)
        self.checkboxLayout.addWidget(self.defaultServer)
        self.confirm_updates = QCheckBox("&Confirm configuration updates",
                                         parent)
        self.confirm_updates.setChecked(
            stob(server.get("confirm_updates", False)))
        self.confirm_updates.setEnabled(self.serversConfigurable)
        self.checkboxLayout.addWidget(self.confirm_updates)

        self.cookie_persistence = QCheckBox("&Stay logged in", parent)
        allow_session_caching = True
        if parent.uploader.config:
            client_settings = parent.uploader.config.get("client_settings")
            if client_settings:
                allow_session_caching = stob(
                    client_settings.get("allow_session_caching", True))
                if not allow_session_caching:
                    server["cookie_persistence"] = False
        self.cookie_persistence.setChecked(
            stob(server.get("cookie_persistence", False)))
        self.cookie_persistence.setEnabled(self.serversConfigurable
                                           and allow_session_caching)
        self.checkboxLayout.addWidget(self.cookie_persistence)
        self.serverOptionsGroupBox.setLayout(self.checkboxLayout)
        layout.addWidget(self.serverOptionsGroupBox)

        # Button Box
        self.buttonBox = QDialogButtonBox(parent)
        self.buttonBox.setObjectName("buttonBox")
        self.buttonBox.setOrientation(Qt.Horizontal)
        self.buttonBox.setStandardButtons(QDialogButtonBox.Cancel
                                          | QDialogButtonBox.Ok)
        self.buttonBox.accepted.connect(self.accept)
        self.buttonBox.rejected.connect(self.reject)
        layout.addWidget(self.buttonBox)
Example #14
0
    def __init__(self, parent, server):
        super(ServerDialog, self).__init__(parent)
        self.server = server
        self.setWindowTitle("Server Configuration")
        self.setWindowFlags(self.windowFlags()
                            & ~Qt.WindowContextHelpButtonHint)
        self.setMinimumWidth(400)
        layout = QVBoxLayout(self)

        self.serverLayout = QVBoxLayout(self)
        self.serverGroupBox = QGroupBox("Server:", self)
        self.hostnameLayout = QHBoxLayout()
        self.hostnameLabel = QLabel("Host:")
        self.hostnameLayout.addWidget(self.hostnameLabel)
        self.hostnameTextBox = QLineEdit()
        self.hostnameTextBox.setText(server.get("host", ""))
        self.hostnameLayout.addWidget(self.hostnameTextBox)
        self.serverLayout.addLayout(self.hostnameLayout)

        self.descriptionLayout = QHBoxLayout()
        self.descriptionLabel = QLabel("Description:")
        self.descriptionLayout.addWidget(self.descriptionLabel)
        self.descriptionTextBox = QLineEdit()
        self.descriptionTextBox.setText(server.get("desc", ""))
        self.descriptionLayout.addWidget(self.descriptionTextBox)
        self.serverLayout.addLayout(self.descriptionLayout)

        self.catalogIDLayout = QHBoxLayout()
        self.catalogIDLabel = QLabel("Catalog ID:")
        self.catalogIDLayout.addWidget(self.catalogIDLabel)
        self.catalogIDTextBox = QLineEdit()
        self.catalogIDTextBox.setText(str(server.get("catalog_id", 1)))
        self.catalogIDLayout.addWidget(self.catalogIDTextBox)
        self.serverLayout.addLayout(self.catalogIDLayout)
        self.serverGroupBox.setLayout(self.serverLayout)
        layout.addWidget(self.serverGroupBox)

        setServers = getattr(parent.uploader, "setServers", None)
        self.serversConfigurable = True if callable(setServers) else False
        self.serverOptionsGroupBox = QGroupBox("Options:", self)
        self.checkboxLayout = QHBoxLayout()
        self.defaultServer = QCheckBox("Set as &Default", parent)
        self.defaultServer.setChecked(stob(server.get("default", False)))
        self.defaultServer.setEnabled(self.serversConfigurable)
        self.checkboxLayout.addWidget(self.defaultServer)
        self.confirm_updates = QCheckBox("&Confirm configuration updates",
                                         parent)
        self.confirm_updates.setChecked(
            stob(server.get("confirm_updates", False)))
        self.confirm_updates.setEnabled(self.serversConfigurable)
        self.checkboxLayout.addWidget(self.confirm_updates)
        self.serverOptionsGroupBox.setLayout(self.checkboxLayout)
        layout.addWidget(self.serverOptionsGroupBox)

        # Button Box
        self.buttonBox = QDialogButtonBox(parent)
        self.buttonBox.setObjectName("buttonBox")
        self.buttonBox.setOrientation(Qt.Horizontal)
        self.buttonBox.setStandardButtons(QDialogButtonBox.Cancel
                                          | QDialogButtonBox.Ok)
        self.buttonBox.accepted.connect(self.accept)
        self.buttonBox.rejected.connect(self.reject)
        layout.addWidget(self.buttonBox)