def initremote(self): self.isinitremote = True self._send_version() prefix = self.annex.getconfig('prefix') root_id = self.annex.getconfig('root_id') if not prefix and not root_id: raise RemoteError("Either prefix or root_id must be given.") token_config = self.annex.getconfig('token') if token_config: self.annex.setconfig('token', "") token_file = Path(token_config) else: git_root = Path(self.annex.getgitdir()) othertmp_dir = git_root / "annex/othertmp" othertmp_dir.mkdir(parents=True, exist_ok=True) token_file = othertmp_dir / "git-annex-remote-googledrive.token" try: self.credentials = Credentials.from_authorized_user_file( token_file) except Exception as e: if token_config: raise RemoteError( "Could not read token file {}:".format(token_file), e) self.annex.debug( "Error reading token file at {}".format(token_file), e, " Trying embedded credentials") if not self.credentials: raise RemoteError( "No Credentials found. Run 'git-annex-remote-googledrive setup' in order to authenticate." ) self.annex.setconfig('root_id', self.root.id) self.isinitremote = False
def _get_root(self, RootClass, creds, prefix=None, root_id=None): #TODO: Maybe implement as property, too try: if prefix: return RootClass.from_path(creds, prefix, uuid=self.uuid, local_appdir=self.local_appdir) else: return RootClass.from_id(creds, root_id, uuid=self.uuid, local_appdir=self.local_appdir) except JSONDecodeError: raise RemoteError( "Access token invalid, please re-run `git-annex-remote-googledrive setup`" ) except (NotAuthenticatedError, RefreshError): raise RemoteError( "Failed to authenticate with Google. Please run 'git-annex-remote-googledrive setup'." ) except FileNotFoundError: if prefix: raise RemoteError( "Prefix {} does not exist or does not point to a folder.". format(prefix)) else: raise RemoteError( "File ID {} does not exist or does not point to a folder.". format(root_id)) except Exception as e: raise RemoteError( "Failed to connect with Google. Please check your internet connection.", e)
def initremote(self): self._send_version() prefix = self.annex.getconfig('prefix') root_id = self.annex.getconfig('root_id') if not prefix and not root_id: raise RemoteError("Either prefix or root_id must be given.") token_config = self.annex.getconfig('token') if token_config: token_file = Path(token_config) else: git_root = Path(self.annex.getgitdir()) othertmp_dir = git_root / "annex/othertmp" othertmp_dir.mkdir(parents=True, exist_ok=True) token_file = othertmp_dir / "git-annex-remote-googledrive.token" try: with token_file.open('r') as fp: credentials = fp.read() except Exception as e: if token_config: raise RemoteError( "Could not read token file {}:".format(token_file), e) self.annex.debug( "Error reading token file at {}".format(token_file), e, " Trying embedded credentials") credentials = None if not credentials: credentials = self.credentials if not credentials: raise RemoteError( "No Credentials found. Run 'git-annex-remote-googledrive setup' in order to authenticate." ) if self.annex.getconfig('exporttree') == 'yes': self.root = self._get_root(ExportRemoteRoot, credentials, prefix, root_id) else: try: self.root = self._get_root(RemoteRoot, credentials, prefix, root_id) except HasSubdirError: raise RemoteError( "Specified folder has subdirectories. Are you sure 'prefix' or 'id' is set correctly? In case you're migrating from gdrive or rclone, run 'git-annex-remote-googledrive migrate {prefix}' first." .format(prefix=prefix)) self.annex.setconfig('token', "") self.annex.setconfig('root_id', self.root.id) self.credentials = ''.join(self.root.json_creds().split())
def transfer_store(self, key: str, filename: str): """ e.g.: filename=".git/annex/objects/qW/pV/SHA256E-s148273064--5880ac1cd05eee90db251c027771b4c9f0a55b7c8b2813c95eff59eef465ebd3.wav/SHA256E-s148273064--5880ac1cd05eee90db251c027771b4c9f0a55b7c8b2813c95eff59eef465ebd3.wav" """ file_path = Path(filename) if self.check_file_sizes(key, file_path): return zip_path = self._get_zip_path(key) zinfo = ZipInfo.from_file(file_path, arcname=key, strict_timestamps=True) zinfo.compress_type = self.compression_algorithm # TODO: create inconsistent state context manager to avoid partial/corrupt # transfers when user KeyboardInterrupts during a copyfileobj call # a lockfile perhaps? with ZipFile(zip_path, 'a', compression=self.compression_algorithm, allowZip64=True) as myzip: with open(file_path, "rb") as src, myzip.open(zinfo, 'w') as dest: copyfileobj(src, dest, callback=self.annex.progress, file_size=file_path.stat().st_size) if not self.check_file_sizes(key, file_path): print("Unknown error while storing the key.") print("Attempting to delete corrupt key from remote...") delete_from_zip(zip_path, key) print("Corrupted key was successfully deleted.") msg = "Could not store this key. drop it --from this-remote and retry." raise RemoteError(msg)
def remove(self, key: str): if not self.checkpresent(key): return zip_path = self._get_zip_path(key) delete_from_zip(zip_path, key) if self.checkpresent(key): raise RemoteError("Could not remove.")
def address_length(self) -> int: address_length = self.annex.getconfig("address_length") address_length = int(address_length) if address_length != "" else 1 if not 0 < address_length < 3: msg = "address_length value should be > 0 and < 3." raise RemoteError(msg) return address_length
def delete_from_zip(zip_path: Path, file_to_delete: Path): args = split(f"zip --delete {zip_path} {file_to_delete}") proc: CompletedProcess = run(args, capture_output=True, text=True) log_stuff(zip_path.parent / LOGFOLDER / f"{zip_path.stem}.log", [proc.stdout, proc.stderr]) if proc.returncode != 0: raise RemoteError( f"Could not delete {file_to_delete!r} from {zip_path.name!r}.")
def prepare(self): """""" node_id = self.annex.getconfig('node') if not node_id: # fall back on outdated 'project' parameter, which could be # just the node ID or a full URL to a project node_id = posixpath.basename( urlparse(self.annex.getconfig('project')).path.strip( posixpath.sep)) if not node_id: raise RemoteError('Could not determine OSF node ID') try: # make use of DataLad's credential manager for a more convenient # out-of-the-box behavior from datalad_osf.utils import get_credentials # we must stay non-interactive, because this is running inside # git-annex's special remote protocal creds = get_credentials(allow_interactive=False) except ImportError as e: # whenever anything goes wrong here, stay clam and fall back # on envvars. # we want this special remote to be fully functional without # datalad creds = dict( username=os.environ.get('OSF_USERNAME', None), password=os.environ.get('OSF_PASSWORD', None), token=os.environ.get('OSF_TOKEN', None), ) # next one just sets up the stage, no requests performed yet, hence # no error checking needed # supply both auth credentials, so osfclient can fall back on user/pass # if needed osf = OSF(**creds) # next one performs initial auth try: self.node = osf.project(node_id) except Exception as e: # we need to raise RemoteError() such that PREPARE-FAILURE # is reported, sadly that doesn't give users any clue # TODO support datalad logging here raise RemoteError('Failed to obtain OSF node handle: {}'.format(e)) # which storage to use, defaults to 'osfstorage' # TODO a node could have more than one? Make parameter to select? self.storage = self.node.storage()
def transfer_retrieve(self, key, filename): """Get a key from OSF and store it to `filename`""" # we have to discover the file handle try: fobj = self.files[key] if fobj is None: # we have no info about this particular key -> trigger request self._files = None fobj = self.files[key] with open(filename, 'wb') as fp: fobj.write_to(fp) except Exception as e: # e.g. if the file couldn't be retrieved if isinstance(e, UnauthorizedException): # UnauthorizedException doesn't give a meaningful str() raise RemoteError('Unauthorized access') else: raise RemoteError(e)
def _new_remote_file(self, key) -> DriveFile: if self.current_folder is None: if self.annex.getconfig("auto_fix_full") == "yes": if self.creator != "from_id": self._auto_fix_full() else: raise RemoteError( "Remote folder full." " Can't fix automatically, because folder is specified by id." " Please consult https://github.com/Lykos153/git-annex-remote-googledrive#fix-full-folder" " for instructions to do it manually.") else: raise RemoteError( "Remote folder is full (max. 500.000 files exceeded). Cannot upload key." " Invoke `enableremote` with `auto_fix_full=yes`" " or consult https://github.com/Lykos153/git-annex-remote-googledrive#fix-full-folder" " for instructions to do it manually.") return self.current_folder.new_file(key)
def _auto_fix_full(self): self.annex.info("Remote folder full. Fixing...") original_prefix = self.folder.name new_root = None try: self.annex.info("Creating new root folder") new_root = self.folder.parent.mkdir(self.folder.name + ".new") self.annex.info("Created as {}({})".format(new_root.name, new_root.id)) except: raise RemoteError( "Couldn't create new folder in {parent_name} ({parent_id})" " Nothing was changed." " Please consult https://github.com/Lykos153/git-annex-remote-googledrive#fix-full-folder" " for instructions to fix it manually.".format( parent_name=self.folder.parent.name, parent_id=self.folder.parent.id)) try: new_name = original_prefix + ".old" self.annex.info( "Moving old root to new one, renaming to {}".format(new_name)) self.folder.move(new_root, new_name=new_name) except: # new_root.rmdir() raise RemoteError( "Couldn't move the root folder." " Nothing was changed." " Please consult https://github.com/Lykos153/git-annex-remote-googledrive#fix-full-folder" " for instructions to fix it manually.") try: self.annex.info("Renaming new root to original prefix: {}".format( original_prefix)) new_root.rename(original_prefix) except: raise RemoteError( "Couldn't rename new folder to prefix." " Please manually rename {new_name} ({new_id}) to {prefix}.". format(new_name=new_root.name, new_id=new_root.id, prefix=original_prefix)) self.annex.info("Success") self.folder = new_root
def transfer_store(self, key, filename): "" try: with open(filename, 'rb') as fp: self.storage.create_file(key, fp, force=True, update=True) except Exception as e: raise RemoteError(e) # we need to register the idea that this key is now present, but # we also want to avoid (re)requesting file info if self._files is not None: # assign None to indicate that we know this key, but # have no info from OSF about it self._files[key] = None
def get(self, src, dst, progress_cb): # Note, that as we are in blocking mode, we can't easily fail on the # actual get (that is 'cat'). # Therefore check beforehand. if not self.exists(src): raise RIARemoteError("annex object {src} does not exist." "".format(src=src)) # TODO: see get_from_archive() # TODO: Currently we will hang forever if the file isn't readable and # it's supposed size is bigger than whatever cat spits out on # stdout. This is because we don't notice that cat has exited # non-zero. We could have end marker on stderr instead, but then # we need to empty stderr beforehand to not act upon output from # earlier calls. This is a problem with blocking reading, since we # need to make sure there's actually something to read in any # case. cmd = 'cat {}'.format(sh_quote(str(src))) self.shell.stdin.write(cmd.encode()) self.shell.stdin.write(b"\n") self.shell.stdin.flush() from os.path import basename key = basename(str(src)) try: size = self._get_download_size_from_key(key) except RemoteError as e: raise RemoteError("src: {}".format(str(src)) + str(e)) if size is None: # rely on SCP for now self.ssh.get(str(src), str(dst)) return with open(dst, 'wb') as target_file: bytes_received = 0 while bytes_received < size: # TODO: some additional abortion criteria? check stderr in # addition? c = self.shell.stdout.read1(self.buffer_size) # no idea yet, whether or not there's sth to gain by a # sophisticated determination of how many bytes to read at once # (like size - bytes_received) if c: bytes_received += len(c) target_file.write(c) progress_cb(bytes_received)
def remove(self, key): """Remove a key from the remote""" f = self.files.get(key, None) if f is None: # removing a not existing key isn't considered an error return try: if f is None: self._files = None f = self.files[key] f.remove() except Exception as e: raise RemoteError(e) # anticipate change in remote and discard obj del self.files[key]
def checkpresent(self, key): "Report whether the OSF project has a particular key" try: if key not in self.files: # we don't know this key at all return False fobj = self.files.get(key, None) if fobj is None: # we knew the key, but never checked with OSF if it really # has it -> trigger request self._files = None return key in self.files except Exception as e: # e.g. if the presence of the key couldn't be determined, eg. in # case of connection error raise RemoteError(e)
def wrapper(self, *args, **kwargs): if not hasattr(self, 'root') or self.root is None: prefix = self.annex.getconfig('prefix') root_id = self.annex.getconfig('root_id') root = self._get_root(root_class, self.credentials, prefix, root_id) if root.id != root_id: raise RemoteError( "ID of root folder changed. Was the repo moved? Please check remote and re-run git annex enableremote" ) self.credentials = ''.join(root.json_creds().split()) self.root = root return f(self, *args, **kwargs)
def removeexportdirectory(self, remote_directory): """Remove the directory `remote_directory` from the remote""" try: folder = [f for f in self.storage.folders if '{sep}{path}{sep}'.format( sep=posixpath.sep, path=remote_directory)] if not folder: # note that removing a not existing directory isn't # considered an error return elif len(folder) > 1: raise RuntimeError("More than matching folder found") folder = folder[0] # osfclient has no way to do this with the public API # going through the backdoor... folder._delete(folder._delete_url) # TODO delete all matching records from self._files except Exception as e: raise RemoteError(e)
def renameexport(self, key, filename, new_filename): """Move the remote file in `name` to `new_name`""" try: fobj = self.files[filename] if fobj is None: # we have no info about this particular key -> trigger request self._files = None fobj = self.files[filename] response = self.storage.session.post( fobj._move_url, data=json.dumps( dict(action='move', path='/{}'.format(dirname(new_filename)), rename=basename(new_filename))), ) if response.status_code != 201: raise RuntimeError('{}: {}'.format(response, response.text)) del self._files[filename] self._files[new_filename] = None except Exception as e: raise RemoteError(repr(e))
def transfer_store(self, key, filename): if self.read_only: raise RemoteError( "Remote was set to read-only. " "Configure 'ria-remote.<name>.force-write' to overrule this.") dsobj_dir, archive_path, key_path = self._get_obj_location(key) key_path = dsobj_dir / key_path if self.io.exists(key_path): # if the key is here, we trust that the content is in sync # with the key return self.io.mkdir(key_path.parent) # we need to copy to a temp location to let # checkpresent fail while the transfer is still in progress # and furthermore not interfere with administrative tasks in annex/objects # In addition include uuid, to not interfere with parallel uploads from different remotes transfer_dir = self.remote_git_dir / "ria-remote-{}".format( self.uuid) / "transfer" self.io.mkdir(transfer_dir) tmp_path = transfer_dir / key if tmp_path.exists(): # Just in case - some parallel job could already be writing to it # at least tell the conclusion, not just some obscure permission error raise RIARemoteError( '{}: upload already in progress'.format(filename)) try: self.io.put(filename, tmp_path) # copy done, atomic rename to actual target self.io.rename(tmp_path, key_path) except Exception as e: # whatever went wrong, we don't want to leave the transfer location blocked self.io.remove(tmp_path) raise e
def _mkdir(directory: Path): try: directory.mkdir(parents=True, exist_ok=True) except OSError as e: raise RemoteError(f"Failed to write to {str(directory)!r}; {e!r}.")
def root(self): if not hasattr(self, '_root') or self._root is None: # pylint: disable=access-member-before-definition prefix = self.annex.getconfig('prefix') root_id = self.annex.getconfig('root_id') exporttree = self.annex.getconfig('exporttree') if exporttree == "yes": root_class = ExportRemoteRoot else: layout_mapping = { 'nodir': NodirRemoteRoot, 'nested': NestedRemoteRoot, 'lower': LowerRemoteRoot, #'directory': DirectoryRemoteRoot, 'mixed': MixedRemoteRoot, } root_class = layout_mapping.get(self.layout, None) if root_class is None: raise RemoteError( "`gdrive_layout` must be one of {}".format( list(layout_mapping.keys()))) if self.credentials is None: raise RemoteError( "Stored credentials are invalid. Please re-run `git-annex-remote-googledrive setup` and `git annex enableremote <remotename>`" ) try: if prefix: root = root_class.from_path(self.credentials, prefix, annex=self.annex, uuid=self.uuid, local_appdir=self.local_appdir) else: root = root_class.from_id(self.credentials, root_id, annex=self.annex, uuid=self.uuid, local_appdir=self.local_appdir) except JSONDecodeError: raise RemoteError( "Access token invalid, please re-run `git-annex-remote-googledrive setup`" ) except (NotAuthenticatedError, RefreshError): raise RemoteError( "Failed to authenticate with Google. Please run 'git-annex-remote-googledrive setup'." ) except FileNotFoundError: if prefix: raise RemoteError( "Prefix {} does not exist or does not point to a folder." .format(prefix)) else: raise RemoteError( "File ID {} does not exist or does not point to a folder." .format(root_id)) if root.id != root_id and not (hasattr(self, 'isinitremote') and self.isinitremote is True): raise RemoteError( "ID of root folder changed. Was the repo moved? Please check remote and re-run git annex enableremote" ) self.credentials = root.creds() self._root = root return self._root
def removeexportdirectory(self, directory): try: self.root.delete_dir(directory) except NotADirectoryError: raise RemoteError("{} is a file. Not deleting".format(directory))
def prepare(self): if not self.directory.is_dir(): raise RemoteError(f"{str(self.directory)!r} not found.") self.compression_algorithm = self.compression_algos[self.compression]
def handle_full_folder(self, key=None): error_message = "Remote root folder {} is full (max. 500.000 files exceeded)." \ " Please switch to a different layout and consult"\ " https://github.com/Lykos153/git-annex-remote-googledrive#fix-full-folder.".format(self.folder.name) raise RemoteError(error_message)
def remove(self, key): raise RemoteError("Removal of content from urls is not possible")
def directory(self) -> Path: directory = self.annex.getconfig("directory") if not directory: raise RemoteError("You need to set directory=") directory = Path(directory).resolve() return directory