コード例 #1
0
ファイル: test_api.py プロジェクト: sterlingbaldwin/bdbag
 def test_cleanup_bag(self):
     logger.info(self.getTestHeader('cleanup bag'))
     try:
         bdb.cleanup_bag(self.test_bag_dir)
         self.assertFalse(ospe(self.test_bag_dir), "Failed to cleanup bag directory")
     except Exception as e:
         self.fail(get_typed_exception(e))
コード例 #2
0
ファイル: test_api.py プロジェクト: sterlingbaldwin/bdbag
 def test_extract_bag_archive_tar(self):
     logger.info(self.getTestHeader('extract bag tar format'))
     try:
         bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.tar'), temp=True)
         self.assertTrue(ospe(bag_path))
         self.assertTrue(bdb.is_bag(bag_path))
         bdb.cleanup_bag(os.path.dirname(bag_path))
     except Exception as e:
         self.fail(get_typed_exception(e))
コード例 #3
0
ファイル: test_api.py プロジェクト: kylechard/bdbag
 def test_extract_bag_archive_tar(self):
     logger.info(self.getTestHeader('extract bag tar format'))
     try:
         bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.tar'), temp=True)
         self.assertTrue(ospe(bag_path))
         self.assertTrue(bdb.is_bag(bag_path))
         bdb.cleanup_bag(os.path.dirname(bag_path))
     except Exception as e:
         self.fail(bdbag.get_named_exception(e))
コード例 #4
0
ファイル: test_api.py プロジェクト: sterlingbaldwin/bdbag
 def test_extract_bag_archive_zip_with_relocate_existing(self):
     logger.info(self.getTestHeader('extract bag zip format, relocate existing'))
     try:
         bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.zip'), temp=False)
         self.assertTrue(ospe(bag_path))
         self.assertTrue(bdb.is_bag(bag_path))
         bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.zip'), temp=False)
         self.assertTrue(ospe(bag_path))
         self.assertTrue(bdb.is_bag(bag_path))
         bdb.cleanup_bag(os.path.dirname(bag_path))
         output = self.stream.getvalue()
         self.assertExpectedMessages(["moving existing directory"], output)
     except Exception as e:
         self.fail(get_typed_exception(e))
コード例 #5
0
    def download(self, **kwargs):

        if not self.config:
            raise DerivaDownloadConfigurationError(
                "No configuration specified!")

        if self.config.get("catalog") is None:
            raise DerivaDownloadConfigurationError(
                "Catalog configuration error!")

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(''.join([
            os.path.join(self.output_dir, 'remote-file-manifest_'),
            str(uuid.uuid4()), ".json"
        ]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))
        self.envars.update({"hostname": self.hostname})

        # 1. If we don't have a client identity, we need to authenticate
        identity = kwargs.get("identity")
        if not identity:
            try:
                if not self.credentials:
                    self.set_credentials(get_credential(self.hostname))
                logging.info("Validating credentials for host: %s" %
                             self.hostname)
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except HTTPError as he:
                if he.response.status_code == 404:
                    logging.info(
                        "No existing login session found for host: %s" %
                        self.hostname)
            except Exception as e:
                raise DerivaDownloadAuthenticationError(
                    "Unable to validate credentials: %s" % format_exception(e))
        wallet = kwargs.get("wallet", {})

        # 2. Check for bagging config and initialize bag related variables
        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get(
                'bag_name', ''.join([
                    "deriva_bag", '_',
                    time.strftime("%Y-%m-%d_%H.%M.%S")
                ])).format(**self.envars)
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get(
                'bag_metadata',
                {"Internal-Sender-Identifier": "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path,
                                   algs=bag_algorithms,
                                   metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get(
                        "Contact-Name", None if not identity else identity.get(
                            'full_name',
                            identity.get('display_name',
                                         identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(
                        author_name=ro_author_name,
                        author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        # 3. Process the set of queries by locating, instantiating, and invoking the specified processor(s)
        outputs = dict()
        base_path = bag_path if bag_path else self.output_dir
        for processor in catalog_config['query_processors']:
            processor_name = processor["processor"]
            processor_type = processor.get('processor_type')
            processor_params = processor.get('processor_params')

            try:
                query_processor = find_query_processor(processor_name,
                                                       processor_type)
                processor = query_processor(
                    self.envars,
                    inputs=outputs,
                    bag=create_bag,
                    catalog=self.catalog,
                    store=self.store,
                    base_path=base_path,
                    processor_params=processor_params,
                    remote_file_manifest=remote_file_manifest,
                    ro_manifest=ro_manifest,
                    ro_author_name=ro_author_name,
                    ro_author_orcid=ro_author_orcid,
                    identity=identity,
                    wallet=wallet)
                outputs = processor.process()
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        # 4. Execute anything in the transform processing pipeline, if configured
        transform_processors = self.config.get('transform_processors', [])
        if transform_processors:
            for processor in transform_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    transform_processor = find_transform_processor(
                        processor_name, processor_type)
                    processor = transform_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        base_path=base_path,
                        bag=create_bag,
                        ro_manifest=ro_manifest,
                        ro_author_name=ro_author_name,
                        ro_author_orcid=ro_author_orcid,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        # 5. Create the bag, and archive (serialize) if necessary
        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(
                    bag_path,
                    algs=bag_algorithms,
                    remote_file_manifest=remote_file_manifest if
                    (remote_file_manifest
                     and os.path.getsize(remote_file_manifest) > 0) else None,
                    update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s" %
                              format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(
                        remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    outputs = {
                        os.path.basename(archive): {
                            LOCAL_PATH_KEY: archive
                        }
                    }
                except Exception as e:
                    logging.error(
                        "Exception while creating data bag archive: %s" %
                        format_exception(e))
                    raise
            else:
                outputs = {
                    os.path.basename(bag_path): {
                        LOCAL_PATH_KEY: bag_path
                    }
                }

        # 6. Execute anything in the post processing pipeline, if configured
        post_processors = self.config.get('post_processors', [])
        if post_processors:
            for processor in post_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    post_processor = find_post_processor(
                        processor_name, processor_type)
                    processor = post_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        return outputs
コード例 #6
0
def main():

    args, is_bag, is_file = parse_cli()
    path = os.path.abspath(args.path)

    archive = None
    temp_path = None
    error = None
    result = 0

    if not args.quiet:
        sys.stderr.write('\n')

    try:
        if not is_file:
            # do not try to create or update the bag if the user just wants to validate or complete an existing bag
            if not (
                (args.validate or args.validate_profile or args.resolve_fetch)
                    and not (args.update and bdb.is_bag(path))):
                if args.checksum and 'all' in args.checksum:
                    args.checksum = ['md5', 'sha1', 'sha256', 'sha512']
                # create or update the bag depending on the input arguments
                bdb.make_bag(path,
                             algs=args.checksum,
                             update=args.update,
                             save_manifests=not args.skip_manifests,
                             prune_manifests=args.prune_manifests,
                             metadata=BAG_METADATA if BAG_METADATA else None,
                             metadata_file=args.metadata_file,
                             remote_file_manifest=args.remote_file_manifest,
                             config_file=args.config_file,
                             ro_metadata_file=args.ro_metadata_file)

        # otherwise just extract the bag if it is an archive and no other conflicting options specified
        elif not (args.validate or args.validate_profile
                  or args.resolve_fetch):
            bdb.extract_bag(path)
            if not args.quiet:
                sys.stderr.write('\n')
            return result

        if args.ro_manifest_generate:
            bdb.generate_ro_manifest(
                path,
                True if args.ro_manifest_generate == "overwrite" else False,
                config_file=args.config_file)

        if args.resolve_fetch:
            if args.validate == 'full':
                sys.stderr.write(ASYNC_TRANSFER_VALIDATION_WARNING)
            bdb.resolve_fetch(
                path,
                force=True if args.resolve_fetch == 'all' else False,
                keychain_file=args.keychain_file,
                config_file=args.config_file,
                filter_expr=args.fetch_filter)

        if args.validate:
            if is_file:
                temp_path = bdb.extract_bag(path, temp=True)
            if args.validate == 'structure':
                bdb.validate_bag_structure(temp_path if temp_path else path)
            else:
                bdb.validate_bag(
                    temp_path if temp_path else path,
                    fast=True if args.validate == 'fast' else False,
                    config_file=args.config_file)

        if args.archiver:
            archive = bdb.archive_bag(path, args.archiver)

        if archive is None and is_file:
            archive = path

        if args.validate_profile:
            if is_file:
                if not temp_path:
                    temp_path = bdb.extract_bag(path, temp=True)
            profile = bdb.validate_bag_profile(
                temp_path if temp_path else path)
            bdb.validate_bag_serialization(archive if archive else path,
                                           profile)

        if args.revert:
            bdb.revert_bag(path)

    except Exception as e:
        result = 1
        error = "Error: %s" % get_typed_exception(e)

    finally:
        if temp_path:
            bdb.cleanup_bag(os.path.dirname(temp_path))
        if result != 0:
            sys.stderr.write("\n%s" % error)

    if not args.quiet:
        sys.stderr.write('\n')

    return result
コード例 #7
0
ファイル: deriva_download.py プロジェクト: ytfqj/deriva-py
    def download(self, identity=None):

        if not self.config:
            raise RuntimeError("No configuration specified!")

        if self.config.get("catalog") is None:
            raise RuntimeError("Catalog configuration error!")

        if not identity:
            logging.info("Validating credentials")
            try:
                if not self.credentials:
                    self.setCredentials(get_credential(self.hostname))
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except Exception as e:
                raise RuntimeError("Unable to validate credentials: %s" % format_exception(e))

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(
            ''.join([os.path.join(self.output_dir, 'remote-file-manifest_'), str(uuid.uuid4()), ".json"]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))

        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get('bag_name', ''.join(["deriva_bag", '_', time.strftime("%Y-%m-%d_%H.%M.%S")]))
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get('bag_metadata', {"Internal-Sender-Identifier":
                                                           "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get("Contact-Name",
                                                  identity.get('full_name',
                                                               identity.get('display_name',
                                                                            identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(author_name=ro_author_name, author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        file_list = list()
        base_path = bag_path if bag_path else self.output_dir
        for query in catalog_config['queries']:
            query_path = query['query_path']
            output_format = query['output_format']
            output_processor = query.get("output_format_processor")
            format_args = query.get('output_format_params', None)
            output_path = query.get('output_path', '')

            try:
                download_processor = findProcessor(output_format, output_processor)
                processor = download_processor(self.envars,
                                               bag=create_bag,
                                               catalog=self.catalog,
                                               store=self.store,
                                               query=query_path,
                                               base_path=base_path,
                                               sub_path=output_path,
                                               format_args=format_args,
                                               remote_file_manifest=remote_file_manifest,
                                               ro_manifest=ro_manifest,
                                               ro_author_name=ro_author_name,
                                               ro_author_orcid=ro_author_orcid)
                file_list.extend(processor.process())
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(bag_path, algs=bag_algorithms, remote_file_manifest=remote_file_manifest, update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s", format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    return [archive]
                except Exception as e:
                    logging.error("Exception while creating data bag archive:", format_exception(e))
                    raise
            else:
                return [bag_path]

        return file_list
コード例 #8
0
 def cleanup_bag(bag_path):
     bdbag_api.cleanup_bag(os.path.dirname(bag_path))
コード例 #9
0
ファイル: bdbag_cli.py プロジェクト: kylechard/bdbag
def main():

    sys.stderr.write('\n')

    args, is_bag, is_file = parse_cli()
    path = os.path.abspath(args.bag_path)

    archive = None
    temp_path = None
    error = None
    result = 0

    try:
        if not is_file:
            # do not try to create or update the bag if the user just wants to validate or complete an existing bag
            if not ((args.validate or args.validate_profile or args.resolve_fetch) and
                    not (args.update and bdb.is_bag(path))):
                if args.checksum and 'all' in args.checksum:
                    args.checksum = ['md5', 'sha1', 'sha256', 'sha512']
                # create or update the bag depending on the input arguments
                bdb.make_bag(path,
                             args.checksum,
                             args.update,
                             args.skip_manifests,
                             args.prune_manifests,
                             BAG_METADATA if BAG_METADATA else None,
                             args.metadata_file,
                             args.remote_file_manifest,
                             args.config_file)

        # otherwise just extract the bag if it is an archive and no other conflicting options specified
        elif not (args.validate or args.validate_profile or args.resolve_fetch):
            bdb.extract_bag(path)
            sys.stderr.write('\n')
            return result

        if args.resolve_fetch:
            if args.validate == 'full':
                sys.stderr.write(ASYNC_TRANSFER_VALIDATION_WARNING)
            bdb.resolve_fetch(path, True if args.resolve_fetch == 'all' else False)

        if args.validate:
            if is_file:
                temp_path = bdb.extract_bag(path, temp=True)
            bdb.validate_bag(temp_path if temp_path else path,
                             True if args.validate == 'fast' else False,
                             args.config_file)

        if args.archiver:
            archive = bdb.archive_bag(path, args.archiver)

        if archive is None and is_file:
            archive = path

        if args.validate_profile:
            if is_file:
                if not temp_path:
                    temp_path = bdb.extract_bag(path, temp=True)
            profile = bdb.validate_bag_profile(temp_path if temp_path else path)
            bdb.validate_bag_serialization(archive if archive else path, profile)

    except Exception as e:
        result = 1
        error = "Error: %s" % bdbag.get_named_exception(e)

    finally:
        if temp_path:
            bdb.cleanup_bag(os.path.dirname(temp_path))
        if result != 0:
            sys.stderr.write("\n%s" % error)

    sys.stderr.write('\n')

    return result