Example #1
0
def test_credentials(url, expected, creds_yaml):
    data = io.StringIO(creds_yaml)
    injector = reader.add_credentials_from_reader(
        injection.CredentialsInjector(), data)

    result = injector.inject(urls.URL(url))
    assert result == urls.URL(expected)
def file_descriptor():
    return _GoogleFileDescriptor(
        id_="123",
        name="file",
        parents=["0"],
        url=urls.URL("gdrive://My Drive/"),
        mime_type="application/thingy",
    )
def folder_descriptor():
    return _GoogleFileDescriptor(
        id_="124",
        name="folder",
        parents=["0"],
        url=urls.URL("gdrive://My Drive/"),
        mime_type=_GoogleFileDescriptor.FOLDER_MIME_TYPE,
    )
Example #4
0
def test_add_credentials():
    env = {
        "one_var": "scheme://*****:*****@hostname",
        "TENTACLIO__CONN__DB": "scheme://mydb/database",
    }
    injector = add_credentials_from_env(CredentialsInjector(), env)
    assert len(injector.registry["scheme"]) == 1
    assert injector.registry["scheme"][0] == urls.URL("scheme://mydb/database")
Example #5
0
    def test_parsing_file_url(self, url, path):
        parsed_url = urls.URL(url)

        assert parsed_url.scheme == "file"
        assert parsed_url.hostname is None
        assert parsed_url.username is None
        assert parsed_url.password is None
        assert parsed_url.port is None
        assert parsed_url.path == path
Example #6
0
 def __init__(self, url: Union[urls.URL, str]) -> None:
     """Create a new client based on a URL object or a string containing a url."""
     if isinstance(url, str):
         url = urls.URL(url)
     self.url = url
     if self.url.scheme not in self.allowed_schemes:
         raise ValueError(
             f"Allowed schemes for {type(self).__name__} are {self.allowed_schemes}; "
             f"found '{self.url.scheme}'")
Example #7
0
def test_authenticate(mocker):

    injector = credentials.CredentialsInjector()
    injector.register_credentials(urls.URL("ftp://*****:*****@google.com"))
    mock_cred = mocker.patch(
        "tentaclio.credentials.api.load_credentials_injector")
    mock_cred.return_value = injector
    authenticated = credentials.authenticate("ftp://google.com/myfile.csv")
    assert authenticated.username == "user"
    assert authenticated.password == "pass"
Example #8
0
 def parser(line):
     nonlocal entries
     parts = line.split()
     file_name = parts[-1]
     url = urls.URL(base_url + file_name)
     if parts[0][0] == "d":
         # first line would look drwxrwx---
         # if it's  dir
         entries.append(fs.build_folder_entry(url))
     else:
         entries.append(fs.build_file_entry(url))
Example #9
0
def _get_connection_urls(env: Env) -> List[urls.URL]:
    connections = []
    for key, val in env.items():
        if key.startswith(TENTACLIO_CONN_PREFIX):
            try:
                connections.append(urls.URL(val))
            except Exception as e:
                logger.error(
                    f"Error parsing credentials from the environment variable {key}"
                )
                raise e
    return connections
Example #10
0
 def _build_descriptors(
         self, files: List[Any]) -> Iterable[_GoogleFileDescriptor]:
     for f in files:
         args = {
             "id_": f.get("id"),
             "name": f.get("name"),
             "mime_type": f.get("mimeType"),
             "parents": f.get("parents"),
             "url": None,
         }
         if self.url_base is not None:
             args["url"] = urls.URL(self.url_base + args["name"])
         yield _GoogleFileDescriptor(**args)
Example #11
0
    def scandir(self, **kwargs) -> Iterable[fs.DirEntry]:
        """Scan the connection url to create dir entries."""
        base_url = f"sftp://{self.url.hostname}:{self.port}{self.url.path}/"
        entries = []
        for attrs in self.conn.listdir_attr(self.url.path):
            url = urls.URL(base_url + attrs.filename)
            if stat.S_ISDIR(attrs.st_mode):
                entries.append(fs.build_folder_entry(url))

            elif stat.S_ISREG(attrs.st_mode):
                entries.append(fs.build_file_entry(url))
            else:
                continue  # ignore other type of entries
        return entries
Example #12
0
    def _scan_mlds(self, base_url):
        entries = []
        for mlsd_entry in self.conn.mlsd(self.url.path, facts=["type"]):
            # https://docs.python.org/3/library/ftplib.html#ftplib.FTP.mlsd
            file_name = mlsd_entry[0]
            entry_type = mlsd_entry[1]["type"]

            url = urls.URL(base_url + file_name)
            if entry_type == "dir":
                entries.append(fs.build_folder_entry(url))
            else:
                entries.append(fs.build_file_entry(url))

        return entries
Example #13
0
def add_credentials_from_reader(
        injector: injection.CredentialsInjector,
        yaml_reader: protocols.Reader) -> injection.CredentialsInjector:
    """Read the credentials from a yml.

    The file has the follwing format:
        secrets:
            my_creds_name: http://user:[email protected]/path
            my_db: postgres://user_db:[email protected]/databasek

    """
    creds = _load_creds_from_yaml(yaml_reader)
    for name, url in creds.items():
        logger.info(f"Adding secret: {name}")
        try:
            injector.register_credentials(urls.URL(url))
        except Exception as e:
            logger.error(
                f"Error while registering credentials {name}:{url} from file")
            raise e

    return injector
Example #14
0
def test_handler_is_registered(url, scheme):
    assert urls.URL(url).scheme == scheme
Example #15
0
class GoogleDriveFSClient(base_client.BaseClient["GoogleDriveFSClient"]):
    """Allow filesystem-like access to google drive.

    Google drive follows a drive oriented architecture more reminiscent of windows filesystems
    than unix approaches. This makes a bit complicated to present the resources as a URLs.

    From the user perspective accessing the resources works as the following
    * urls MUST have an empty hostname `gdrive:///My Drive/` or `gdrive:/My Drive/`

    * the first element of the path has to be the drive name i.e. `My Drive` for the default
    drive or the drive name as it appears in the web ui for shared drives.
    """

    DEFAULT_DRIVE_NAME = "My Drive"
    DEFAULT_DRIVE_ID = "root"
    DEFAULT_DRIVE_DESCRIPTOR = _GoogleDriveDescriptor(
        id_=DEFAULT_DRIVE_ID,
        name=DEFAULT_DRIVE_NAME,
        root_descriptor=_GoogleFileDescriptor(
            id_=DEFAULT_DRIVE_ID,
            name=DEFAULT_DRIVE_NAME,
            mime_type=_GoogleFileDescriptor.FOLDER_MIME_TYPE,
            url=urls.URL("gdrive:///{self.DEFAULT_DRIVE}"),
            parents=[],
        ),
    )

    allowed_schemes = ["gdrive", "googledrive"]

    drive_name: str
    path_parts: Tuple[str, ...]

    # Not an easy task to figure out the type of the
    # returned value from the library
    _service: Optional[Any] = None

    def __init__(self, url: Union[urls.URL, str]) -> None:
        """Create a new GoogleDriveFSClient."""
        super().__init__(url)

        parts = list(
            filter(lambda part: len(part) > 0, self.url.path.split("/")))
        if len(parts) == 0:
            raise ValueError(
                f"Bad url: {self.url.path} :Google Drive needs at least "
                "the drive part (i.e. gdrive:///My Drive/)")
        self.drive_name = parts[0]
        self.path_parts = tuple(parts[1:])

    @property
    def _drive(self):
        drives = self._get_drives()
        if self.drive_name not in drives:
            names = [d for d in drives]
            raise ValueError(f"Drive name (hostname) should be one of {names}")
        return drives[self.drive_name]

    def _connect(self) -> "GoogleDriveFSClient":
        self._refresh_service()
        return self

    def _refresh_service(self, token_file: str = TOKEN_FILE):
        """Check the validity of the credentials."""
        if self._service is not None:
            return
        creds = _load_credentials(token_file)
        self._service = build("drive", "v3", credentials=creds)

    def close(self) -> None:
        """Close the dummy connection to google drive."""
        self.closed = True

    # Stream methods:

    def get(self, writer: protocols.ByteWriter, **kwargs) -> None:
        """Get the contents of the google drive file."""
        leaf_descriptor = self._get_leaf_descriptor()
        _DownloadRequest(self._service, leaf_descriptor.id_, writer).execute()

    def put(self, reader: protocols.ByteReader, **kwargs) -> None:
        """Write the contents of the reader to the google drive file."""
        try:
            file_descriptor = self._get_leaf_descriptor()
            self._update(file_descriptor, reader)
        except IOError:
            # file doesn't exist, then create
            self._create(reader)

    def _create(self, reader: protocols.ByteReader):
        """Create a new file."""
        descriptors = self._get_path_descriptors(ignore_tail=True)

        parent = descriptors[-1]
        if not parent.is_dir:
            raise IOError(f"{self.url} parent path is not a folder")

        uploader = _CreateRequest(service=self._service,
                                  name=self.path_parts[-1],
                                  parent_id=parent.id_,
                                  reader=reader)
        uploader.execute()

    def _update(self, file_descriptor: _GoogleFileDescriptor,
                reader: protocols.ByteReader):
        """Update file contents."""
        uploader = _UpdateRequest(
            service=self._service,
            name=file_descriptor.name,
            file_id=file_descriptor.id_,
            reader=reader,
        )
        uploader.execute()

    # scandir related methods

    @decorators.check_conn
    def scandir(self, **kwargs) -> Iterable[fs.DirEntry]:
        """List contents of a folder from google drive."""
        leaf_descriptor = self._get_leaf_descriptor()

        if not leaf_descriptor.is_dir:
            raise IOError(f"{self.url} is not a folder")

        url_base = str(self.url).rstrip("/") + "/"
        lister = _ListFilesRequest(self._service,
                                   url_base=url_base,
                                   q=f"'{leaf_descriptor.id_}' in parents")
        return lister.list()

    # remove

    def remove(self):
        """Remove the file from google drive."""
        leaf_descriptor = self._get_leaf_descriptor()
        args = {
            "fileId": leaf_descriptor.id_,
            "supportsTeamDrives": True,
        }
        self._service.files().delete(**args).execute()

    @functools.lru_cache(maxsize=1)
    def _get_drives(self) -> Dict[str, _GoogleDriveDescriptor]:
        drives = {d.name: d for d in _ListDrivesRequest(self._service).list()}
        drives[self.DEFAULT_DRIVE_NAME] = self.DEFAULT_DRIVE_DESCRIPTOR
        return drives

    def _get_leaf_descriptor(self) -> _GoogleFileDescriptor:
        """Get the last descriptor from the path part of the url."""
        return self._get_path_descriptors()[-1]

    def _get_path_descriptors(self,
                              ignore_tail=False
                              ) -> List[_GoogleFileDescriptor]:
        parts = self.path_parts
        if ignore_tail:
            parts = parts[:-1]
        return list(self._path_parts_to_descriptors(self._drive, parts))

    def _path_parts_to_descriptors(
            self, drive: _GoogleDriveDescriptor,
            path_parts: Iterable[str]) -> List[_GoogleFileDescriptor]:
        """Convert the path parts into google drive descriptors."""
        file_descriptors = [drive.root_descriptor]
        parent = None
        for pathPart in path_parts:
            file_descriptor = self._get_file_descriptor_by_name(
                pathPart, parent)
            parent = file_descriptor.id_
            file_descriptors.append(file_descriptor)

        return file_descriptors

    def _get_file_descriptor_by_name(self,
                                     name: str,
                                     parent: Optional[str] = None):
        """Get the file id given the file name and it's parent."""
        args = {"q": f" name = '{name}'"}

        if parent is not None:
            args["q"] += f" and '{parent}' in parents"

        results = list(_ListFilesRequest(self._service, **args).list())

        if len(results) == 0:
            raise IOError(
                f"Descriptor not found for {self.url} "
                f"Could not find part {name} with parent id {parent}")
        return results[0]
Example #16
0
 def test_missing_url(self):
     with pytest.raises(urls.URLError):
         urls.URL(None)
Example #17
0
def _build_url(bucket: str, prefix: str) -> urls.URL:
    if prefix:
        prefix = prefix.rstrip("/")

    return urls.URL(f"s3://{bucket}/{prefix}")
Example #18
0
    def test_url_escaped_fields(self, url, username, password):
        parsed_url = urls.URL(url)

        assert parsed_url.username == username
        assert parsed_url.password == password
Example #19
0
 def test_string_hides_password(self):
     original = urls.URL("scheme://*****:*****@hostname.com")
     str_url = str(original)
     assert str_url == "scheme://*****:*****@hostname.com"
Example #20
0
 def test_copy(self, original, components, expected):
     result = urls.URL(original).copy(**components)
     assert result == urls.URL(expected)
Example #21
0
 def test_url_equality(self, url_1, url_2, should_be_equal):
     assert (urls.URL(url_1) == urls.URL(url_2)) == should_be_equal
Example #22
0
def authenticate(url: str) -> urls.URL:
    """Authenticate url."""
    return load_credentials_injector().inject(urls.URL(url))
Example #23
0
 def _build_bucket_entries(self) -> Iterable[fs.DirEntry]:
     return (fs.DirEntry(url=urls.URL("s3://" + name),
                         is_dir=True,
                         is_file=False) for name in self._get_buckets())
Example #24
0
def _from_os_dir_entry(original: os.DirEntry) -> fs.DirEntry:
    return fs.DirEntry(
        url=urls.URL("file://" + os.path.abspath(original.path)),
        is_dir=bool(original.is_dir()),
        is_file=bool(original.is_file()),
    )