def test_credentials(url, expected, creds_yaml): data = io.StringIO(creds_yaml) injector = reader.add_credentials_from_reader( injection.CredentialsInjector(), data) result = injector.inject(urls.URL(url)) assert result == urls.URL(expected)
def file_descriptor(): return _GoogleFileDescriptor( id_="123", name="file", parents=["0"], url=urls.URL("gdrive://My Drive/"), mime_type="application/thingy", )
def folder_descriptor(): return _GoogleFileDescriptor( id_="124", name="folder", parents=["0"], url=urls.URL("gdrive://My Drive/"), mime_type=_GoogleFileDescriptor.FOLDER_MIME_TYPE, )
def test_add_credentials(): env = { "one_var": "scheme://*****:*****@hostname", "TENTACLIO__CONN__DB": "scheme://mydb/database", } injector = add_credentials_from_env(CredentialsInjector(), env) assert len(injector.registry["scheme"]) == 1 assert injector.registry["scheme"][0] == urls.URL("scheme://mydb/database")
def test_parsing_file_url(self, url, path): parsed_url = urls.URL(url) assert parsed_url.scheme == "file" assert parsed_url.hostname is None assert parsed_url.username is None assert parsed_url.password is None assert parsed_url.port is None assert parsed_url.path == path
def __init__(self, url: Union[urls.URL, str]) -> None: """Create a new client based on a URL object or a string containing a url.""" if isinstance(url, str): url = urls.URL(url) self.url = url if self.url.scheme not in self.allowed_schemes: raise ValueError( f"Allowed schemes for {type(self).__name__} are {self.allowed_schemes}; " f"found '{self.url.scheme}'")
def test_authenticate(mocker): injector = credentials.CredentialsInjector() injector.register_credentials(urls.URL("ftp://*****:*****@google.com")) mock_cred = mocker.patch( "tentaclio.credentials.api.load_credentials_injector") mock_cred.return_value = injector authenticated = credentials.authenticate("ftp://google.com/myfile.csv") assert authenticated.username == "user" assert authenticated.password == "pass"
def parser(line): nonlocal entries parts = line.split() file_name = parts[-1] url = urls.URL(base_url + file_name) if parts[0][0] == "d": # first line would look drwxrwx--- # if it's dir entries.append(fs.build_folder_entry(url)) else: entries.append(fs.build_file_entry(url))
def _get_connection_urls(env: Env) -> List[urls.URL]: connections = [] for key, val in env.items(): if key.startswith(TENTACLIO_CONN_PREFIX): try: connections.append(urls.URL(val)) except Exception as e: logger.error( f"Error parsing credentials from the environment variable {key}" ) raise e return connections
def _build_descriptors( self, files: List[Any]) -> Iterable[_GoogleFileDescriptor]: for f in files: args = { "id_": f.get("id"), "name": f.get("name"), "mime_type": f.get("mimeType"), "parents": f.get("parents"), "url": None, } if self.url_base is not None: args["url"] = urls.URL(self.url_base + args["name"]) yield _GoogleFileDescriptor(**args)
def scandir(self, **kwargs) -> Iterable[fs.DirEntry]: """Scan the connection url to create dir entries.""" base_url = f"sftp://{self.url.hostname}:{self.port}{self.url.path}/" entries = [] for attrs in self.conn.listdir_attr(self.url.path): url = urls.URL(base_url + attrs.filename) if stat.S_ISDIR(attrs.st_mode): entries.append(fs.build_folder_entry(url)) elif stat.S_ISREG(attrs.st_mode): entries.append(fs.build_file_entry(url)) else: continue # ignore other type of entries return entries
def _scan_mlds(self, base_url): entries = [] for mlsd_entry in self.conn.mlsd(self.url.path, facts=["type"]): # https://docs.python.org/3/library/ftplib.html#ftplib.FTP.mlsd file_name = mlsd_entry[0] entry_type = mlsd_entry[1]["type"] url = urls.URL(base_url + file_name) if entry_type == "dir": entries.append(fs.build_folder_entry(url)) else: entries.append(fs.build_file_entry(url)) return entries
def add_credentials_from_reader( injector: injection.CredentialsInjector, yaml_reader: protocols.Reader) -> injection.CredentialsInjector: """Read the credentials from a yml. The file has the follwing format: secrets: my_creds_name: http://user:[email protected]/path my_db: postgres://user_db:[email protected]/databasek """ creds = _load_creds_from_yaml(yaml_reader) for name, url in creds.items(): logger.info(f"Adding secret: {name}") try: injector.register_credentials(urls.URL(url)) except Exception as e: logger.error( f"Error while registering credentials {name}:{url} from file") raise e return injector
def test_handler_is_registered(url, scheme): assert urls.URL(url).scheme == scheme
class GoogleDriveFSClient(base_client.BaseClient["GoogleDriveFSClient"]): """Allow filesystem-like access to google drive. Google drive follows a drive oriented architecture more reminiscent of windows filesystems than unix approaches. This makes a bit complicated to present the resources as a URLs. From the user perspective accessing the resources works as the following * urls MUST have an empty hostname `gdrive:///My Drive/` or `gdrive:/My Drive/` * the first element of the path has to be the drive name i.e. `My Drive` for the default drive or the drive name as it appears in the web ui for shared drives. """ DEFAULT_DRIVE_NAME = "My Drive" DEFAULT_DRIVE_ID = "root" DEFAULT_DRIVE_DESCRIPTOR = _GoogleDriveDescriptor( id_=DEFAULT_DRIVE_ID, name=DEFAULT_DRIVE_NAME, root_descriptor=_GoogleFileDescriptor( id_=DEFAULT_DRIVE_ID, name=DEFAULT_DRIVE_NAME, mime_type=_GoogleFileDescriptor.FOLDER_MIME_TYPE, url=urls.URL("gdrive:///{self.DEFAULT_DRIVE}"), parents=[], ), ) allowed_schemes = ["gdrive", "googledrive"] drive_name: str path_parts: Tuple[str, ...] # Not an easy task to figure out the type of the # returned value from the library _service: Optional[Any] = None def __init__(self, url: Union[urls.URL, str]) -> None: """Create a new GoogleDriveFSClient.""" super().__init__(url) parts = list( filter(lambda part: len(part) > 0, self.url.path.split("/"))) if len(parts) == 0: raise ValueError( f"Bad url: {self.url.path} :Google Drive needs at least " "the drive part (i.e. gdrive:///My Drive/)") self.drive_name = parts[0] self.path_parts = tuple(parts[1:]) @property def _drive(self): drives = self._get_drives() if self.drive_name not in drives: names = [d for d in drives] raise ValueError(f"Drive name (hostname) should be one of {names}") return drives[self.drive_name] def _connect(self) -> "GoogleDriveFSClient": self._refresh_service() return self def _refresh_service(self, token_file: str = TOKEN_FILE): """Check the validity of the credentials.""" if self._service is not None: return creds = _load_credentials(token_file) self._service = build("drive", "v3", credentials=creds) def close(self) -> None: """Close the dummy connection to google drive.""" self.closed = True # Stream methods: def get(self, writer: protocols.ByteWriter, **kwargs) -> None: """Get the contents of the google drive file.""" leaf_descriptor = self._get_leaf_descriptor() _DownloadRequest(self._service, leaf_descriptor.id_, writer).execute() def put(self, reader: protocols.ByteReader, **kwargs) -> None: """Write the contents of the reader to the google drive file.""" try: file_descriptor = self._get_leaf_descriptor() self._update(file_descriptor, reader) except IOError: # file doesn't exist, then create self._create(reader) def _create(self, reader: protocols.ByteReader): """Create a new file.""" descriptors = self._get_path_descriptors(ignore_tail=True) parent = descriptors[-1] if not parent.is_dir: raise IOError(f"{self.url} parent path is not a folder") uploader = _CreateRequest(service=self._service, name=self.path_parts[-1], parent_id=parent.id_, reader=reader) uploader.execute() def _update(self, file_descriptor: _GoogleFileDescriptor, reader: protocols.ByteReader): """Update file contents.""" uploader = _UpdateRequest( service=self._service, name=file_descriptor.name, file_id=file_descriptor.id_, reader=reader, ) uploader.execute() # scandir related methods @decorators.check_conn def scandir(self, **kwargs) -> Iterable[fs.DirEntry]: """List contents of a folder from google drive.""" leaf_descriptor = self._get_leaf_descriptor() if not leaf_descriptor.is_dir: raise IOError(f"{self.url} is not a folder") url_base = str(self.url).rstrip("/") + "/" lister = _ListFilesRequest(self._service, url_base=url_base, q=f"'{leaf_descriptor.id_}' in parents") return lister.list() # remove def remove(self): """Remove the file from google drive.""" leaf_descriptor = self._get_leaf_descriptor() args = { "fileId": leaf_descriptor.id_, "supportsTeamDrives": True, } self._service.files().delete(**args).execute() @functools.lru_cache(maxsize=1) def _get_drives(self) -> Dict[str, _GoogleDriveDescriptor]: drives = {d.name: d for d in _ListDrivesRequest(self._service).list()} drives[self.DEFAULT_DRIVE_NAME] = self.DEFAULT_DRIVE_DESCRIPTOR return drives def _get_leaf_descriptor(self) -> _GoogleFileDescriptor: """Get the last descriptor from the path part of the url.""" return self._get_path_descriptors()[-1] def _get_path_descriptors(self, ignore_tail=False ) -> List[_GoogleFileDescriptor]: parts = self.path_parts if ignore_tail: parts = parts[:-1] return list(self._path_parts_to_descriptors(self._drive, parts)) def _path_parts_to_descriptors( self, drive: _GoogleDriveDescriptor, path_parts: Iterable[str]) -> List[_GoogleFileDescriptor]: """Convert the path parts into google drive descriptors.""" file_descriptors = [drive.root_descriptor] parent = None for pathPart in path_parts: file_descriptor = self._get_file_descriptor_by_name( pathPart, parent) parent = file_descriptor.id_ file_descriptors.append(file_descriptor) return file_descriptors def _get_file_descriptor_by_name(self, name: str, parent: Optional[str] = None): """Get the file id given the file name and it's parent.""" args = {"q": f" name = '{name}'"} if parent is not None: args["q"] += f" and '{parent}' in parents" results = list(_ListFilesRequest(self._service, **args).list()) if len(results) == 0: raise IOError( f"Descriptor not found for {self.url} " f"Could not find part {name} with parent id {parent}") return results[0]
def test_missing_url(self): with pytest.raises(urls.URLError): urls.URL(None)
def _build_url(bucket: str, prefix: str) -> urls.URL: if prefix: prefix = prefix.rstrip("/") return urls.URL(f"s3://{bucket}/{prefix}")
def test_url_escaped_fields(self, url, username, password): parsed_url = urls.URL(url) assert parsed_url.username == username assert parsed_url.password == password
def test_string_hides_password(self): original = urls.URL("scheme://*****:*****@hostname.com") str_url = str(original) assert str_url == "scheme://*****:*****@hostname.com"
def test_copy(self, original, components, expected): result = urls.URL(original).copy(**components) assert result == urls.URL(expected)
def test_url_equality(self, url_1, url_2, should_be_equal): assert (urls.URL(url_1) == urls.URL(url_2)) == should_be_equal
def authenticate(url: str) -> urls.URL: """Authenticate url.""" return load_credentials_injector().inject(urls.URL(url))
def _build_bucket_entries(self) -> Iterable[fs.DirEntry]: return (fs.DirEntry(url=urls.URL("s3://" + name), is_dir=True, is_file=False) for name in self._get_buckets())
def _from_os_dir_entry(original: os.DirEntry) -> fs.DirEntry: return fs.DirEntry( url=urls.URL("file://" + os.path.abspath(original.path)), is_dir=bool(original.is_dir()), is_file=bool(original.is_file()), )