コード例 #1
0
ファイル: storagebroker.py プロジェクト: jotelha/dtool-ecs
    def list_dataset_uris(cls, base_uri, config_path):
        """Return list containing URIs with base URI."""
        uri_list = []

        parse_result = generous_parse_uri(base_uri)
        bucket = parse_result.netloc

        ecs_endpoint = get_config_value("DTOOL_ECS_ENDPOINT_{}".format(bucket))
        ecs_access_key_id = get_config_value(
            "DTOOL_ECS_ACCESS_KEY_ID_{}".format(bucket))
        ecs_secret_access_key = get_config_value(
            "DTOOL_ECS_SECRET_ACCESS_KEY_{}".format(bucket))

        session = Session(aws_access_key_id=ecs_access_key_id,
                          aws_secret_access_key=ecs_secret_access_key)

        resource = session.resource('s3',
                                    endpoint_url=ecs_endpoint,
                                    config=BOTO3_CONFIG)

        parse_result = generous_parse_uri(base_uri)
        bucket_name = parse_result.netloc

        bucket = resource.Bucket(bucket_name)

        for obj in bucket.objects.filter(Prefix='dtool').all():
            uuid = obj.key.split('-', 1)[1]
            uri = cls.generate_uri(None, uuid, base_uri)

            storage_broker = cls(uri, config_path)
            if storage_broker.has_admin_metadata():
                uri_list.append(uri)

        return uri_list
コード例 #2
0
ファイル: storagebroker.py プロジェクト: jotelha/dtool-ecs
    def __init__(self, uri, config_path=None):

        parse_result = generous_parse_uri(uri)

        self.bucket = parse_result.netloc
        uuid = parse_result.path[1:]

        self.uuid = uuid

        ecs_endpoint = get_config_value("DTOOL_ECS_ENDPOINT_{}".format(
            self.bucket))
        ecs_access_key_id = get_config_value(
            "DTOOL_ECS_ACCESS_KEY_ID_{}".format(self.bucket))
        ecs_secret_access_key = get_config_value(
            "DTOOL_ECS_SECRET_ACCESS_KEY_{}".format(self.bucket))

        if not ecs_endpoint:
            raise RuntimeError(
                "No ECS endpoint specified for bucket '{bucket}', "
                "please set DTOOL_ECS_ENDPOINT_{bucket}.".format(
                    bucket=self.bucket))
        if not ecs_access_key_id:
            raise RuntimeError(
                "No ECS access key id specified for bucket '{bucket}', "
                "please set DTOOL_ECS_ACCESS_KEY_ID_{bucket}.".format(
                    bucket=self.bucket))
        if not ecs_secret_access_key:
            raise RuntimeError(
                "No ECS secret access key specified for bucket '{bucket}', "
                "please set DTOOL_ECS_SECRET_ACCESS_KEY_{bucket}.".format(
                    bucket=self.bucket))

        session = Session(aws_access_key_id=ecs_access_key_id,
                          aws_secret_access_key=ecs_secret_access_key)

        self.s3resource = session.resource('s3',
                                           endpoint_url=ecs_endpoint,
                                           config=BOTO3_CONFIG)
        self.s3client = session.client('s3',
                                       endpoint_url=ecs_endpoint,
                                       config=BOTO3_CONFIG)

        self._structure_parameters = _ECS_STRUCTURE_PARAMETERS
        self.dataset_registration_key = 'dtool-{}'.format(self.uuid)
        self._structure_parameters[
            "dataset_registration_key"] = self.dataset_registration_key  # NOQA

        self.data_key_prefix = self._generate_key_prefix("data_key_infix")
        self.fragments_key_prefix = self._generate_key_prefix(
            "fragment_key_infix")
        self.overlays_key_prefix = self._generate_key_prefix(
            "overlays_key_infix")
        self.annotations_key_prefix = self._generate_key_prefix(
            "annotations_key_infix")

        self.http_manifest_key = self._generate_key("http_manifest_key")

        self._s3_cache_abspath = get_config_value("DTOOL_CACHE_DIRECTORY",
                                                  config_path=config_path,
                                                  default=DEFAULT_CACHE_PATH)
コード例 #3
0
ファイル: storagebroker.py プロジェクト: jotelha/dtool-s3
    def __init__(self, uri, config_path=None):
        parse_result = generous_parse_uri(uri)
        self.bucket = parse_result.netloc
        uuid = parse_result.path[1:]

        self.dataset_prefix = get_config_value("DTOOL_S3_DATASET_PREFIX")
        self.uuid = uuid

        self.s3resource, self.s3client = \
            self._get_resource_and_client(self.bucket)

        self._structure_parameters = _STRUCTURE_PARAMETERS
        self.dataset_registration_key = 'dtool-{}'.format(self.uuid)
        self._structure_parameters[
            "dataset_registration_key"] = self.dataset_registration_key  # NOQA

        self.data_key_prefix = self._generate_key_prefix("data_key_infix")
        self.fragments_key_prefix = self._generate_key_prefix(
            "fragment_key_infix")
        self.overlays_key_prefix = self._generate_key_prefix(
            "overlays_key_infix")
        self.annotations_key_prefix = self._generate_key_prefix(
            "annotations_key_infix")
        self.tags_key_prefix = self._generate_key_prefix("tags_key_infix")

        self.http_manifest_key = self._generate_key("http_manifest_key")

        self._s3_cache_abspath = get_config_value("DTOOL_CACHE_DIRECTORY",
                                                  config_path=config_path,
                                                  default=DEFAULT_CACHE_PATH)
コード例 #4
0
    def list_dataset_uris(cls, base_uri, config_path):
        """Return list containing URIs in location given by base_uri."""

        parsed_uri = generous_parse_uri(base_uri)
        uri_list = []

        path = parsed_uri.path
        if IS_WINDOWS:
            path = unix_to_windows_path(parsed_uri.path)

        for d in os.listdir(path):
            dir_path = os.path.join(path, d)

            if not os.path.isdir(dir_path):
                continue

            storage_broker = cls(dir_path, config_path)

            if not storage_broker.has_admin_metadata():
                continue

            uri = storage_broker.generate_uri(
                name=d,
                uuid=None,
                base_uri=base_uri
            )
            uri_list.append(uri)

        return uri_list
コード例 #5
0
def _sanitise_base_uri(tmp_dir):
    base_uri = tmp_dir
    if IS_WINDOWS:
        parsed_base_uri = generous_parse_uri(tmp_dir)
        unix_path = windows_to_unix_path(parsed_base_uri.path)
        base_uri = "file://{}".format(unix_path)
    return base_uri
コード例 #6
0
    def __init__(self, uri, config_path=None):

        parse_result = generous_parse_uri(uri)

        self.storage_account_name = parse_result.netloc

        uuid = parse_result.path[1:]

        self.uuid = uuid

        self.fragments_key_prefix = self._generate_key('fragments_key_prefix')
        self.overlays_key_prefix = self._generate_key('overlays_key_prefix')
        self.annotations_key_prefix = self._generate_key(
            'annotations_key_prefix')
        self.tags_key_prefix = self._generate_key('tags_key_prefix')

        self.http_manifest_key = self._generate_key("http_manifest_key")

        self._azure_cache_abspath = get_config_value(
            "DTOOL_CACHE_DIRECTORY",
            config_path=config_path,
            default=DEFAULT_CACHE_PATH)

        self._blobservice = get_blob_service(self.storage_account_name,
                                             config_path)
コード例 #7
0
ファイル: storagebroker.py プロジェクト: wgnoehring/dtool-smb
    def generate_uri(cls, name, uuid, base_uri):
        scheme, netloc, path, _, _, _ = generous_parse_uri(base_uri)
        assert scheme == 'smb'

        # Force path (third component of tuple) to be the dataset UUID
        uri = urlunparse((scheme, netloc, uuid, _, _, _))

        return uri
コード例 #8
0
def _get_abspath_from_uri(uri):
    """Return abspath.
    """
    logger.debug("In _get_abspath_from_uri")
    logger.debug("_get_abspath_from_uri.input_uri: {}".format(uri))
    parse_result = generous_parse_uri(uri)
    path = parse_result.path
    if IS_WINDOWS:
        path = unix_to_windows_path(path)
    abspath = os.path.abspath(path)
    logger.debug("_get_abspath_from_uri.return: {}".format(abspath))
    return abspath
コード例 #9
0
def test_generous_parse_uri():

    from dtoolcore.utils import generous_parse_uri

    s3_uri = "s3://my-bucket/path/to/files"

    parse_result = generous_parse_uri(s3_uri)

    assert parse_result.scheme == 's3'
    assert parse_result.netloc == 'my-bucket'
    assert parse_result.path == '/path/to/files'

    lazy_file_uri = ".my_dataset"

    parse_result = generous_parse_uri(lazy_file_uri)
    assert parse_result.scheme == 'file'

    full_file_uri = "file://localhost/path/to/files"
    parse_result = generous_parse_uri(full_file_uri)

    assert parse_result.scheme == 'file'
    assert parse_result.netloc == 'localhost'
    assert parse_result.path == '/path/to/files'

    irods_uri = "irods:///jic_raw_data/rg-someone/my_dataset"
    parse_result = generous_parse_uri(irods_uri)

    assert parse_result.scheme == 'irods'
    assert parse_result.netloc == ''
    assert parse_result.path == '/jic_raw_data/rg-someone/my_dataset'

    irods_uri = "irods:/jic_raw_data/rg-someone/my_dataset"
    parse_result = generous_parse_uri(irods_uri)

    assert parse_result.scheme == 'irods'
    assert parse_result.netloc == ''
    assert parse_result.path == '/jic_raw_data/rg-someone/my_dataset'
コード例 #10
0
 def generate_uri(cls, name, uuid, base_uri):
     logger.debug("In DiskStorageBroker.generate_uri...")
     parsed_uri = generous_parse_uri(base_uri)
     base_dir_path = parsed_uri.path
     if IS_WINDOWS:
         base_dir_path = unix_to_windows_path(base_dir_path)
     dataset_path = os.path.join(base_dir_path, name)
     dataset_abspath = os.path.abspath(dataset_path)
     if IS_WINDOWS:
         dataset_abspath = windows_to_unix_path(dataset_abspath)
         return "{}:///{}".format(cls.key, dataset_abspath)
     else:
         return "{}://{}{}".format(
             cls.key,
             socket.gethostname(),
             dataset_abspath
         )
コード例 #11
0
ファイル: storagebroker.py プロジェクト: jotelha/dtool-s3
    def list_dataset_uris(cls, base_uri, config_path):
        """Return list containing URIs with base URI."""
        uri_list = []

        parse_result = generous_parse_uri(base_uri)
        bucket_name = parse_result.netloc
        resource, _ = cls._get_resource_and_client(bucket_name)
        bucket = resource.Bucket(bucket_name)

        for obj in bucket.objects.filter(Prefix='dtool').all():
            uuid = obj.key.split('-', 1)[1]
            uri = cls.generate_uri(None, uuid, base_uri)

            storage_broker = cls(uri, config_path)
            if storage_broker.has_admin_metadata():
                uri_list.append(uri)

        return uri_list
コード例 #12
0
    def __init__(self, uri, config_path=None):

        parse_result = generous_parse_uri(uri)

        self.config_name = parse_result.netloc
        uuid = parse_result.path[1:]

        self.uuid = uuid

        # Connect to SMB server.
        self.conn, self.service_name, self.path = \
            SMBStorageBroker._connect(uri, config_path)

        # Define some other more abspaths.
        self._data_path = self._generate_path("data_directory")
        self._overlays_path = self._generate_path("overlays_directory")
        self._annotations_path = self._generate_path(
            "annotations_directory"
        )
        self._tags_path = self._generate_path(
            "tags_directory"
        )
        self._metadata_fragments_path = self._generate_path(
            "metadata_fragments_directory"
        )

        # Define some essential directories to be created.
        self._essential_subdirectories = [
            self._generate_path("dtool_directory"),
            self._data_path,
            self._overlays_path,
            self._annotations_path,
            self._tags_path,
        ]

        # Cache for file hashes computed on upload
        self._hash_cache = {}

        self._smb_cache_abspath = get_config_value(
            "DTOOL_CACHE_DIRECTORY",
            config_path=config_path,
            default=DEFAULT_CACHE_PATH
        )
コード例 #13
0
    def list_dataset_uris(cls, base_uri, config_path):
        """Return list containing URIs with base URI."""

        storage_account_name = generous_parse_uri(base_uri).netloc
        blobservice = get_blob_service(storage_account_name, config_path)
        containers = blobservice.list_containers(include_metadata=True)

        uri_list = []
        for c in containers:
            admin_metadata = c.metadata

            # Ignore containers without metadata.
            if len(admin_metadata) == 0:
                continue

            uri = cls.generate_uri(admin_metadata['name'],
                                   admin_metadata['uuid'], base_uri)
            uri_list.append(uri)

        return uri_list
コード例 #14
0
ファイル: storagebroker.py プロジェクト: wgnoehring/dtool-smb
    def _connect(cls, uri, config_path):
        parse_result = generous_parse_uri(uri)

        config_name = parse_result.netloc

        username = get_config_value(
            "DTOOL_SMB_USERNAME_{}".format(config_name),
            config_path=config_path
        )
        server_name = get_config_value(
            "DTOOL_SMB_SERVER_NAME_{}".format(config_name),
            config_path=config_path
        )
        server_port = get_config_value(
            "DTOOL_SMB_SERVER_PORT_{}".format(config_name),
            config_path=config_path
        )
        domain = get_config_value(
            "DTOOL_SMB_DOMAIN_{}".format(config_name),
            config_path=config_path
        )
        service_name = get_config_value(
            "DTOOL_SMB_SERVICE_NAME_{}".format(config_name),
            config_path=config_path
        )
        path = get_config_value(
            "DTOOL_SMB_PATH_{}".format(config_name),
            config_path=config_path
        )

        if not username:
            raise RuntimeError("No username specified for service '{name}', "
                               "please set DTOOL_SMB_USERNAME_{name}."
                               .format(name=config_name))
        if not server_name:
            raise RuntimeError("No server name specified for service '{name}', "
                               "please set DTOOL_SMB_SERVER_NAME_{name}."
                               .format(name=config_name))
        if not server_port:
            raise RuntimeError("No server port specified for service '{name}', "
                               "please set DTOOL_SMB_SERVER_PORT_{name}."
                               .format(name=config_name))
        if not domain:
            raise RuntimeError("No domain specified for service '{name}', "
                               "please set DTOOL_SMB_DOMAIN_{name}."
                               .format(name=config_name))
        if not service_name:
            raise RuntimeError("No service name specified for service '{name}', "
                               "please set DTOOL_SMB_SERVICE_NAME_{name}. "
                               "(The service name is the name of the 'share'.)"
                               .format(name=config_name))
        if not path:
            raise RuntimeError("No path specified for service '{name}', "
                               "please set DTOOL_SMB_PATH_{name}."
                               .format(name=config_name))

        # server_port might be string, i.e. if specified via env vars
        if not isinstance(server_port, int):
            server_port = int(server_port)

        server_ip = socket.gethostbyname(server_name)
        host_name = socket.gethostname()
        password = get_config_value(
            "DTOOL_SMB_PASSWORD_{}".format(config_name),
            config_path=config_path
        )
        if password is None:
            if cls._connect.num_calls == 1:
                password = getpass.getpass()
                cls.password = password
            else:
                password = cls.password
        conn = SMBConnection(username, password, host_name, server_name,
            domain=domain, use_ntlm_v2=True, is_direct_tcp=True)

        logger.info( ( "Connecting from '{host:s}' to "
            "'smb://{user:s}@{ip:s}({server:s}):{port:d}', "
            "DOMAIN '{domain:s}'").format(user=username,
                ip=server_ip, server=server_name,
                port=server_port, host=host_name,
                domain=domain) )

        # for testing, see types of arguments
        logger.debug( ( "Types HOST '{host:s}', USER '{user:s}', IP '{ip:s}', "
           "SERVER '{server:s}', PORT '{port:s}', DOMAIN '{domain:s}'").format(
                user=type(username).__name__,
                ip=type(server_ip).__name__,
                server=type(server_name).__name__,
                port=type(server_port).__name__,
                host=type(host_name).__name__,
                domain=type(domain).__name__))

        conn.connect(server_ip, port=server_port)

        return conn, service_name, path
コード例 #15
0
def uri_to_path(uri):
    parsed = generous_parse_uri(uri)
    if IS_WINDOWS:
        return unix_to_windows_path(parsed.path)
    return parsed.path