def main(): parser = argparse.ArgumentParser( description="Download data from OSF project") parser.add_argument("directories", help="Text file containing list of directories" " to download") parser.add_argument("destination", help="Root destination directory") parser.add_argument("--project", help="Project code") parser.add_argument("--username", help="Username for project") parser.add_argument("--password", help="Password for user") parser.add_argument("--token", help="Access token") args = parser.parse_args() # Override file config if provided config = osfcli.config_from_file() config.update({k: v for k, v in vars(args).items() if v is not None}) osf = OSF(username=args.username, password=args.password, token=args.token) project = osf.project(config['project']) dest = args.destination to_download = parse_download_list(args.directories) for s in project.storages: download_folders(s, to_download, dest)
def fetch(self, spec, output_dir, yield_output=False): """Fetch RDM directory""" project_id = spec["project_id"] path = spec["path"] host = spec["host"] api_url = host["api"][:-1] if host["api"].endswith( "/") else host["api"] yield "Fetching RDM directory {} on {} at {}.\n".format( path, project_id, api_url) osf = OSF( token=host["token"] if "token" in host else os.getenv("OSF_TOKEN"), base_url=api_url, ) project = osf.project(project_id) if len(path): storage = project.storage(path[:path.index("/")] if "/" in path else path) subpath = path[path.index("/"):] if "/" in path else "/" for line in self._fetch_storage(storage, output_dir, subpath): yield line else: for storage in project.storages: for line in self._fetch_storage(storage, output_dir): yield line
def upload_to_osf(username, password, local_path): # All the data in the data folder will be: # 1. split to public and private data directories if not done already # 2. zipped to tar.gz format # 3. uploaded to private and public osf repositiories local_path = Path(local_path) remote_path = Path(REMOTE_PATH) if not local_path.is_dir(): raise RuntimeError(f"Expected source ({local_path})" "to be a directory") osf = OSF(username=username, password=password) # ######################################################## # TODO: make the split to public and private data directories # to have a path: # local_path # |---public # |---private # all the data in the public directory will be added to the # public repo, and from private directory to the private repo # # here the split has already been done beforehand # make sure there are private and public subdirs in your data directory assert (local_path / 'private').is_dir() assert (local_path / 'public').is_dir() project_codes = [PROJECT_CODE_PUBLIC, PROJECT_CODE_PRIVATE] project_types = ['public', 'private'] for project_code, project_type in zip(project_codes, project_types): print(f'compressing {project_type} data') used_dir = local_path / project_type tar_name = local_path / (project_type + '.tar.gz') # add files from the given dir to your archive with tarfile.open(tar_name, "w:gz") as tar_handle: for next_file in used_dir.rglob('*'): if not next_file.is_file(): continue print(next_file) remote_name = next_file.relative_to(used_dir) tar_handle.add(next_file, arcname=remote_name) print(f'uploading {project_type} data') # establish the connection with the correct repo on osf project = osf.project(project_code) store = project.storage('osfstorage') with open(tar_name, 'rb') as fp: fname = remote_path / (project_type + '.tar.gz') store.create_file(fname, fp, force=True) print(f'successfully uploaded {fname} to {REMOTE_PATH}')
def get_connection_info(get_private, username=None, password=None): "Get connection to OSF and info relative to public/private data." if get_private: osf, folder_name = OSF(username=username, password=password), 'private' else: assert username is None and password is None, ( "Username and password should only be provided when fetching " "private data.") osf, folder_name = OSF(), 'public' data_config = RAMP_FOLDER_CONFIGURATION[folder_name] try: project = osf.project(data_config['code']) store = project.storage('osfstorage') except UnauthorizedException: raise ValueError("Invalid credentials for RAMP private storage.") return store, data_config
def download_from_osf(private, username, password): file_idx = 0 if private: project_code = PROJECT_CODE_PRIVATE else: project_code = PROJECT_CODE # if the file already exists it will overwrite it # osf = OSF(username=USERNAME, password=PASSWORD) if private: osf = OSF(username=username, password=password) else: osf = OSF() project = osf.project(project_code) store = project.storage('osfstorage') for file_ in store.files: # get only those files which are stored in REMOTE_PATH pathname = file_.path if REMOTE_PATH not in pathname: # we are not interested in this file continue # otherwise we are copying it locally # check if the directory tree exists and add the dirs if necessary # do not include project name pathname = pathname[pathname.find(REMOTE_PATH) + len(REMOTE_PATH):] save_file = os.path.join(LOCAL_PATH, pathname) pathfile, filename = os.path.split(save_file) if not os.path.exists(pathfile): os.makedirs(pathfile) if not os.path.exists(save_file): # do not save it if the file already exists with open(save_file, "wb") as f: file_.write_to(f) file_idx += 1 else: print(f'Skipping existing file {save_file}') print(f'saved {file_idx} files to {LOCAL_PATH}')
# in the command line: osf -p t4uf8 clone temp/ # however this corresponds to the whole project. we are interested only in the # stroke data here # this script does the same as (from terminal) # osf upload local_path remote_path LOCAL_PATH = 'data' # local path to the data REMOTE_PATH = 'stroke/' # remote path where to store the data on OSF PROJECT_CODE = 't4uf8' # to find your PROJECT_CODE navigate to your OSF # project on the web. The link will be something of this type: # https://osf.io/t4uf8/ , here t4uf8 is the PROJECT_CODE # if the file already exists it will overwrite it # osf = OSF(username=USERNAME, password=PASSWORD) osf = OSF() project = osf.project(PROJECT_CODE) destination = 'https://osf.io/' + PROJECT_CODE + '/' store = project.storage('osfstorage') def download_from_osf(): file_idx = 0 for file_ in store.files: # get only those files which are stored in REMOTE_PATH pathname = file_.path if REMOTE_PATH not in pathname: # we are not interested in this file continue
from osfclient.api import OSF # this script does the same as (from terminal) # osf -r -p your_password -u your_username upload local_path remote_path LOCAL_PATH = 'data/' # local path to the data REMOTE_PATH = 'stroke' # remote path where to store the data on OSF PROJECT_CODE = 't4uf8' # to find your PROJECT_CODE navigate to your OSF # project on the web. The link will be something of this type: # https://osf.io/t4uf8/ , here t4uf8 is the PROJECT_CODE USERNAME = '******' PASSWORD = '******' # for uploading the data you need to give the username # and the password of one of the project owners # if the file already exists it will overwrite it osf = OSF(username=USERNAME, password=PASSWORD) project = osf.project(PROJECT_CODE) destination = 'https://osf.io/' + PROJECT_CODE + '/' store = project.storage('osfstorage') def upload_recursive_to_osf(): # here we are only using recursive if not os.path.isdir(LOCAL_PATH): raise RuntimeError(f"Expected source ({LOCAL_PATH})" "to be a directory") _, dir_name = os.path.split(LOCAL_PATH) idx = 1