Esempio n. 1
0
    def __init__(self, uri, config_path=None):

        parse_result = generous_parse_uri(uri)

        self.bucket = parse_result.netloc
        uuid = parse_result.path[1:]

        self.uuid = uuid

        ecs_endpoint = get_config_value("DTOOL_ECS_ENDPOINT_{}".format(
            self.bucket))
        ecs_access_key_id = get_config_value(
            "DTOOL_ECS_ACCESS_KEY_ID_{}".format(self.bucket))
        ecs_secret_access_key = get_config_value(
            "DTOOL_ECS_SECRET_ACCESS_KEY_{}".format(self.bucket))

        if not ecs_endpoint:
            raise RuntimeError(
                "No ECS endpoint specified for bucket '{bucket}', "
                "please set DTOOL_ECS_ENDPOINT_{bucket}.".format(
                    bucket=self.bucket))
        if not ecs_access_key_id:
            raise RuntimeError(
                "No ECS access key id specified for bucket '{bucket}', "
                "please set DTOOL_ECS_ACCESS_KEY_ID_{bucket}.".format(
                    bucket=self.bucket))
        if not ecs_secret_access_key:
            raise RuntimeError(
                "No ECS secret access key specified for bucket '{bucket}', "
                "please set DTOOL_ECS_SECRET_ACCESS_KEY_{bucket}.".format(
                    bucket=self.bucket))

        session = Session(aws_access_key_id=ecs_access_key_id,
                          aws_secret_access_key=ecs_secret_access_key)

        self.s3resource = session.resource('s3',
                                           endpoint_url=ecs_endpoint,
                                           config=BOTO3_CONFIG)
        self.s3client = session.client('s3',
                                       endpoint_url=ecs_endpoint,
                                       config=BOTO3_CONFIG)

        self._structure_parameters = _ECS_STRUCTURE_PARAMETERS
        self.dataset_registration_key = 'dtool-{}'.format(self.uuid)
        self._structure_parameters[
            "dataset_registration_key"] = self.dataset_registration_key  # NOQA

        self.data_key_prefix = self._generate_key_prefix("data_key_infix")
        self.fragments_key_prefix = self._generate_key_prefix(
            "fragment_key_infix")
        self.overlays_key_prefix = self._generate_key_prefix(
            "overlays_key_infix")
        self.annotations_key_prefix = self._generate_key_prefix(
            "annotations_key_infix")

        self.http_manifest_key = self._generate_key("http_manifest_key")

        self._s3_cache_abspath = get_config_value("DTOOL_CACHE_DIRECTORY",
                                                  config_path=config_path,
                                                  default=DEFAULT_CACHE_PATH)
Esempio n. 2
0
    def list_dataset_uris(cls, base_uri, config_path):
        """Return list containing URIs with base URI."""
        uri_list = []

        parse_result = generous_parse_uri(base_uri)
        bucket = parse_result.netloc

        ecs_endpoint = get_config_value("DTOOL_ECS_ENDPOINT_{}".format(bucket))
        ecs_access_key_id = get_config_value(
            "DTOOL_ECS_ACCESS_KEY_ID_{}".format(bucket))
        ecs_secret_access_key = get_config_value(
            "DTOOL_ECS_SECRET_ACCESS_KEY_{}".format(bucket))

        session = Session(aws_access_key_id=ecs_access_key_id,
                          aws_secret_access_key=ecs_secret_access_key)

        resource = session.resource('s3',
                                    endpoint_url=ecs_endpoint,
                                    config=BOTO3_CONFIG)

        parse_result = generous_parse_uri(base_uri)
        bucket_name = parse_result.netloc

        bucket = resource.Bucket(bucket_name)

        for obj in bucket.objects.filter(Prefix='dtool').all():
            uuid = obj.key.split('-', 1)[1]
            uri = cls.generate_uri(None, uuid, base_uri)

            storage_broker = cls(uri, config_path)
            if storage_broker.has_admin_metadata():
                uri_list.append(uri)

        return uri_list
Esempio n. 3
0
    def _get_resource_and_client(cls, bucket_name):
        # Get S3 endpoint, access key and secret key. Can be left
        # unconfigured, in which case the AWS configuration is used.
        s3_endpoint = get_config_value(
            "DTOOL_S3_ENDPOINT_{}".format(bucket_name))
        s3_access_key_id = get_config_value(
            "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name))
        s3_secret_access_key = get_config_value(
            "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name))

        if (s3_endpoint is not None or s3_access_key_id is not None
                or s3_secret_access_key is not None):
            # We can only proceed if all three of endpoint, access key id and
            # secret access key are configure
            if (not s3_endpoint or not s3_access_key_id
                    or not s3_secret_access_key):
                raise RuntimeError(
                    "If you want to configure your S3 endpoint for bucket "
                    "'{bucket}' via the dtool config file, please set "
                    "DTOOL_S3_ENDPOINT_{bucket}, "
                    "DTOOL_S3_ACCESS_KEY_ID_{bucket} and "
                    "DTOOL_S3_SECRET_ACCESS_KEY_{bucket}.".format(
                        bucket=bucket_name))

            session = Session(aws_access_key_id=s3_access_key_id,
                              aws_secret_access_key=s3_secret_access_key)

            s3resource = session.resource('s3', endpoint_url=s3_endpoint)
            s3client = session.client('s3', endpoint_url=s3_endpoint)
        else:
            s3resource = boto3.resource('s3')
            s3client = boto3.client('s3')

        return s3resource, s3client
Esempio n. 4
0
    def __init__(self, uri, config_path=None):
        parse_result = generous_parse_uri(uri)
        self.bucket = parse_result.netloc
        uuid = parse_result.path[1:]

        self.dataset_prefix = get_config_value("DTOOL_S3_DATASET_PREFIX")
        self.uuid = uuid

        self.s3resource, self.s3client = \
            self._get_resource_and_client(self.bucket)

        self._structure_parameters = _STRUCTURE_PARAMETERS
        self.dataset_registration_key = 'dtool-{}'.format(self.uuid)
        self._structure_parameters[
            "dataset_registration_key"] = self.dataset_registration_key  # NOQA

        self.data_key_prefix = self._generate_key_prefix("data_key_infix")
        self.fragments_key_prefix = self._generate_key_prefix(
            "fragment_key_infix")
        self.overlays_key_prefix = self._generate_key_prefix(
            "overlays_key_infix")
        self.annotations_key_prefix = self._generate_key_prefix(
            "annotations_key_infix")
        self.tags_key_prefix = self._generate_key_prefix("tags_key_infix")

        self.http_manifest_key = self._generate_key("http_manifest_key")

        self._s3_cache_abspath = get_config_value("DTOOL_CACHE_DIRECTORY",
                                                  config_path=config_path,
                                                  default=DEFAULT_CACHE_PATH)
Esempio n. 5
0
def test_get_config_value_from_file(tmp_dir_fixture):  # NOQA
    from dtoolcore.utils import get_config_value, get_config_value_from_file

    config_path = os.path.join(tmp_dir_fixture, "my.conf")

    # Test when config file is missing.
    assert get_config_value_from_file(
        key="MY_KEY",
        config_path=config_path
    ) is None

    config = {"MY_KEY": "from_file"}
    with open(config_path, "w") as fh:
        json.dump(config, fh)

    # Test when config file exists.
    assert "from_file" == get_config_value_from_file(
        key="MY_KEY",
        config_path=config_path
    )

    # Test use in get_config_value function.
    value = get_config_value(
        key="MY_KEY",
        config_path=config_path,
        default="hello"
    )
    assert value == "from_file"
Esempio n. 6
0
    def __init__(self, uri, config_path=None):

        self._abspath = os.path.abspath(uri)
        self._dtool_abspath = os.path.join(self._abspath, '.dtool')
        self._admin_metadata_fpath = os.path.join(self._dtool_abspath, 'dtool')
        self._data_abspath = os.path.join(self._abspath, 'data')
        self._manifest_abspath = os.path.join(
            self._dtool_abspath,
            'manifest.json'
        )
        self._readme_abspath = os.path.join(
            self._abspath,
            'README.yml'
        )
        self._overlays_abspath = os.path.join(
            self._dtool_abspath,
            'overlays'
        )
        self._metadata_fragments_abspath = os.path.join(
            self._dtool_abspath,
            'tmp_fragments'
        )

        self._irods_cache_abspath = get_config_value(
            "DTOOL_IRODS_CACHE_DIRECTORY",
            config_path=config_path,
            default=os.path.expanduser("~/.cache/dtool/irods")
        )

        # Cache for optimisation
        self._use_cache = False
        self._ls_abspath_cache = {}
        self._metadata_cache = {}
        self._size_and_timestamp_cache = {}
        self._metadata_dir_exists_cache = None
Esempio n. 7
0
    def __init__(self, uri, config_path=None):

        parse_result = generous_parse_uri(uri)

        self.storage_account_name = parse_result.netloc

        uuid = parse_result.path[1:]

        self.uuid = uuid

        self.fragments_key_prefix = self._generate_key('fragments_key_prefix')
        self.overlays_key_prefix = self._generate_key('overlays_key_prefix')
        self.annotations_key_prefix = self._generate_key(
            'annotations_key_prefix')
        self.tags_key_prefix = self._generate_key('tags_key_prefix')

        self.http_manifest_key = self._generate_key("http_manifest_key")

        self._azure_cache_abspath = get_config_value(
            "DTOOL_CACHE_DIRECTORY",
            config_path=config_path,
            default=DEFAULT_CACHE_PATH)

        self._blobservice = get_blob_service(self.storage_account_name,
                                             config_path)
Esempio n. 8
0
    def http_enable(self):
        logger.debug("HTTP enable {}".format(self))

        expiry = get_config_value("DTOOL_S3_PUBLISH_EXPIRY")
        if expiry is not None:
            if expiry != "":
                try:
                    expiry = int(expiry)
                except ValueError:
                    logger.error(
                        "DTOOL_S3_PUBLISH_EXPIRY must be set to a value that can be converted to an integer"
                    )  # NOQA
                    raise (RuntimeError())
        http_manifest = self._generate_http_manifest(expiry)
        manifest_url = self._write_http_manifest(http_manifest, expiry)  # NOQA

        manifest_presignature = None

        if "?" in manifest_url:
            _, manifest_presignature = manifest_url.split("?")

        access_url = self._url(self._get_prefix() + self.uuid)
        if manifest_presignature is not None:
            access_url = access_url + "?" + manifest_presignature

        return access_url
Esempio n. 9
0
def test_get_config_value():
    from dtoolcore.utils import get_config_value
    value = get_config_value(
        key="MY_KEY",
        config_path=None,
        default="hello"
    )
    assert value == "hello"
Esempio n. 10
0
def test_get_config_value_from_file(tmp_dir_fixture):  # NOQA
    from dtoolcore.utils import get_config_value

    config = {"MY_KEY": "from_file"}
    config_path = os.path.join(tmp_dir_fixture, "my.conf")
    with open(config_path, "w") as fh:
        json.dump(config, fh)

    value = get_config_value(
        key="MY_KEY",
        config_path=config_path,
        default="hello"
    )
    assert value == "from_file"
Esempio n. 11
0
    def __init__(self, uri, config_path=None):

        parse_result = generous_parse_uri(uri)

        self.config_name = parse_result.netloc
        uuid = parse_result.path[1:]

        self.uuid = uuid

        # Connect to SMB server.
        self.conn, self.service_name, self.path = \
            SMBStorageBroker._connect(uri, config_path)

        # Define some other more abspaths.
        self._data_path = self._generate_path("data_directory")
        self._overlays_path = self._generate_path("overlays_directory")
        self._annotations_path = self._generate_path(
            "annotations_directory"
        )
        self._tags_path = self._generate_path(
            "tags_directory"
        )
        self._metadata_fragments_path = self._generate_path(
            "metadata_fragments_directory"
        )

        # Define some essential directories to be created.
        self._essential_subdirectories = [
            self._generate_path("dtool_directory"),
            self._data_path,
            self._overlays_path,
            self._annotations_path,
            self._tags_path,
        ]

        # Cache for file hashes computed on upload
        self._hash_cache = {}

        self._smb_cache_abspath = get_config_value(
            "DTOOL_CACHE_DIRECTORY",
            config_path=config_path,
            default=DEFAULT_CACHE_PATH
        )
Esempio n. 12
0
    def _connect(cls, uri, config_path):
        parse_result = generous_parse_uri(uri)

        config_name = parse_result.netloc

        username = get_config_value(
            "DTOOL_SMB_USERNAME_{}".format(config_name),
            config_path=config_path
        )
        server_name = get_config_value(
            "DTOOL_SMB_SERVER_NAME_{}".format(config_name),
            config_path=config_path
        )
        server_port = get_config_value(
            "DTOOL_SMB_SERVER_PORT_{}".format(config_name),
            config_path=config_path
        )
        domain = get_config_value(
            "DTOOL_SMB_DOMAIN_{}".format(config_name),
            config_path=config_path
        )
        service_name = get_config_value(
            "DTOOL_SMB_SERVICE_NAME_{}".format(config_name),
            config_path=config_path
        )
        path = get_config_value(
            "DTOOL_SMB_PATH_{}".format(config_name),
            config_path=config_path
        )

        if not username:
            raise RuntimeError("No username specified for service '{name}', "
                               "please set DTOOL_SMB_USERNAME_{name}."
                               .format(name=config_name))
        if not server_name:
            raise RuntimeError("No server name specified for service '{name}', "
                               "please set DTOOL_SMB_SERVER_NAME_{name}."
                               .format(name=config_name))
        if not server_port:
            raise RuntimeError("No server port specified for service '{name}', "
                               "please set DTOOL_SMB_SERVER_PORT_{name}."
                               .format(name=config_name))
        if not domain:
            raise RuntimeError("No domain specified for service '{name}', "
                               "please set DTOOL_SMB_DOMAIN_{name}."
                               .format(name=config_name))
        if not service_name:
            raise RuntimeError("No service name specified for service '{name}', "
                               "please set DTOOL_SMB_SERVICE_NAME_{name}. "
                               "(The service name is the name of the 'share'.)"
                               .format(name=config_name))
        if not path:
            raise RuntimeError("No path specified for service '{name}', "
                               "please set DTOOL_SMB_PATH_{name}."
                               .format(name=config_name))

        # server_port might be string, i.e. if specified via env vars
        if not isinstance(server_port, int):
            server_port = int(server_port)

        server_ip = socket.gethostbyname(server_name)
        host_name = socket.gethostname()
        password = get_config_value(
            "DTOOL_SMB_PASSWORD_{}".format(config_name),
            config_path=config_path
        )
        if password is None:
            if cls._connect.num_calls == 1:
                password = getpass.getpass()
                cls.password = password
            else:
                password = cls.password
        conn = SMBConnection(username, password, host_name, server_name,
            domain=domain, use_ntlm_v2=True, is_direct_tcp=True)

        logger.info( ( "Connecting from '{host:s}' to "
            "'smb://{user:s}@{ip:s}({server:s}):{port:d}', "
            "DOMAIN '{domain:s}'").format(user=username,
                ip=server_ip, server=server_name,
                port=server_port, host=host_name,
                domain=domain) )

        # for testing, see types of arguments
        logger.debug( ( "Types HOST '{host:s}', USER '{user:s}', IP '{ip:s}', "
           "SERVER '{server:s}', PORT '{port:s}', DOMAIN '{domain:s}'").format(
                user=type(username).__name__,
                ip=type(server_ip).__name__,
                server=type(server_name).__name__,
                port=type(server_port).__name__,
                host=type(host_name).__name__,
                domain=type(domain).__name__))

        conn.connect(server_ip, port=server_port)

        return conn, service_name, path
Esempio n. 13
0
def get_azure_account_key(account_name, config_path):
    """Return the Azure account key associated with the account name."""

    account_key = get_config_value("DTOOL_AZURE_ACCOUNT_KEY_" + account_name,
                                   config_path=config_path)
    return account_key