def upload(args): """ Given a folder or file, upload all the folders and files contained within it, skipping ones that already exist on the remote. """ base_remote_path, path_dict = Object.validate_full_path( args.full_path, vault=args.vault, path=args.path) # Assert the vault exists and is accessible vault = Vault.get_by_full_path(path_dict['vault_full_path']) # If not the vault root, validate remote path exists and is a folder if path_dict['path'] != '/': Object.get_by_full_path(base_remote_path, assert_type='folder') for local_path in args.local_path: local_path = local_path.rstrip('/') local_start = os.path.basename(local_path) if os.path.isdir(local_path): _upload_folder(path_dict['domain'], vault, base_remote_path, local_path, local_start) else: Object.upload_file(local_path, path_dict['path'], vault.full_path)
def get_by_full_path(cls, full_path, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client obj = Object.get_by_full_path(full_path, assert_type='dataset', client=_client) return Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs)
def _get_parent_folder(self, path): from solvebio import Object return Object.get_by_full_path( ':'.join([self.full_path, path]), assert_type='folder', client=self._client )
def get_or_create_uploads_path(cls, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client v = cls.get_personal_vault(client=_client) default_path = 'Uploads' full_path = '{0}:/{1}'.format(v.full_path, default_path) try: upload_dir = Object.get_by_full_path(full_path, assert_type='folder', client=_client) except NotFoundError: print( "Uploads directory not found. Creating {0}".format(full_path)) upload_dir = Object.create(vault_id=v.id, object_type='folder', filename=default_path, client=_client) return upload_dir.path
def get_or_create_uploads_path(cls, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client v = cls.get_personal_vault(client=_client) default_path = 'Uploads' full_path = '{0}:/{1}'.format(v.full_path, default_path) try: upload_dir = Object.get_by_full_path( full_path, assert_type='folder', client=_client) except NotFoundError: print("Uploads directory not found. Creating {0}" .format(full_path)) upload_dir = Object.create( vault_id=v.id, object_type='folder', filename=default_path, client=_client ) return upload_dir.path
def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client local_path = os.path.expanduser(local_path) if os.stat(local_path).st_size == 0: print('Notice: Cannot upload empty file {0}'.format(local_path)) return # Get vault vault = Vault.get_by_full_path(vault_full_path, client=_client) # Get MD5, mimetype, and file size for the object local_md5, _ = md5sum(local_path, multipart_threshold=None) _, mimetype = mimetypes.guess_type(local_path) size = os.path.getsize(local_path) # Check if object exists already and compare md5sums full_path, path_dict = Object.validate_full_path(os.path.join( '{}:{}'.format(vault.full_path, remote_path), os.path.basename(local_path)), client=_client) try: obj = cls.get_by_full_path(full_path, client=_client) if not obj.is_file: print('WARNING: A {} currently exists at {}'.format( obj.object_type, full_path)) else: # Check against md5sum of remote file if obj.md5 == local_md5: print('WARNING: File {} (md5sum {}) already exists, ' 'not uploading'.format(full_path, local_md5)) return obj else: print('WARNING: File {} exists on SolveBio with different ' 'md5sum (local: {} vs remote: {}) Uploading anyway, ' 'but not overwriting.'.format( full_path, local_md5, obj.md5)) except NotFoundError: pass # Lookup parent object if path_dict['parent_path'] == '/': parent_object_id = None else: parent_obj = Object.get_by_full_path(path_dict['parent_full_path'], assert_type='folder', client=_client) parent_object_id = parent_obj.id description = kwargs.get('description') # Create the file, and upload it to the Upload URL obj = Object.create(vault_id=vault.id, parent_object_id=parent_object_id, object_type='file', filename=os.path.basename(local_path), md5=local_md5, mimetype=mimetype, size=size, description=description, tags=kwargs.get('tags', []) or [], client=_client) print('Notice: File created for {0} at {1}'.format( local_path, obj.path)) print('Notice: Upload initialized') upload_url = obj.upload_url headers = { 'Content-MD5': base64.b64encode(binascii.unhexlify(local_md5)), 'Content-Type': mimetype, 'Content-Length': str(size), } # Use a session with a retry policy to handle connection errors. session = requests.Session() max_retries = 5 retry = Retry( total=max_retries, read=max_retries, connect=max_retries, backoff_factor=0.3, status_forcelist=(500, 502, 504, 400), ) session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retry)) upload_resp = session.put(upload_url, data=open(local_path, 'rb'), headers=headers) if upload_resp.status_code != 200: print('WARNING: Upload status code for {0} was {1}'.format( local_path, upload_resp.status_code)) # Clean up the failed upload obj.delete(force=True) raise FileUploadError(upload_resp.content) else: print('Notice: Successfully uploaded {0} to {1}'.format( local_path, obj.path)) return obj
def _upload_folder(domain, vault, base_remote_path, base_local_path, local_start, exclude_paths=None, dry_run=False): # Create the upload root folder if it does not exist on the remote try: upload_root_path, _ = Object.validate_full_path( os.path.join(base_remote_path, local_start) ) Object.get_by_full_path(upload_root_path, assert_type='folder') except NotFoundError: base_remote_path, path_dict = \ Object.validate_full_path(base_remote_path) base_folder_path = os.path.join(base_remote_path, local_start) if dry_run: print('[Dry Run] Creating folder {}'.format(base_folder_path)) else: _create_folder(vault, base_folder_path) # Create folders and upload files for abs_local_parent_path, folders, files in os.walk(base_local_path): # Strips off the local path and adds the parent directory at # each phase of the loop local_parent_path = re.sub( '^' + os.path.dirname(base_local_path), '', abs_local_parent_path ).lstrip('/') if should_exclude(abs_local_parent_path, exclude_paths, dry_run=dry_run): continue remote_folder_full_path = \ os.path.join(base_remote_path, local_parent_path) # Create folders for folder in folders: new_folder_path = os.path.join(abs_local_parent_path, folder) if should_exclude(new_folder_path, exclude_paths, dry_run=dry_run): continue remote_path = os.path.join(remote_folder_full_path, folder) if dry_run: print('[Dry Run] Creating folder {}'.format(remote_path)) else: _create_folder(vault, remote_path) # Upload the files that do not yet exist on the remote for f in files: local_file_path = os.path.join(abs_local_parent_path, f) if should_exclude(local_file_path, exclude_paths, dry_run=dry_run): continue if dry_run: print('[Dry Run] Uploading {} to {}' .format(local_file_path, remote_folder_full_path)) else: remote_parent = Object.get_by_full_path( remote_folder_full_path, assert_type='folder') Object.upload_file(local_file_path, remote_parent.path, vault.full_path)
def import_file(args): """ Given a dataset and a local path, upload and import the file(s). Command arguments (args): * create_dataset and it's args * capacity * template_id * template_file * capacity * tag * metadata * metadata_json_file * create_vault * full_path * commit_mode * remote_source * dry_run * follow * file (list) """ if args.dry_run: print("NOTE: Running import command in dry run mode") full_path, path_dict = Object.validate_full_path(args.full_path) files_list = [] if args.remote_source: # Validate files for file_fp in args.file: files_ = list(Object.all(glob=file_fp, limit=1000)) if not files_: print("Did not find any {}files at path {}".format( 'remote ' if args.remote_source else '', file_fp)) else: for file_ in files_: print("Found file: {}".format(file_.full_path)) files_list.append(file_) else: # Local files # Note: if these are globs or folders, then this will # create a multi-file manifest which is deprecated # and should be updated to one file per import. files_list = [fp for fp in args.file] if not files_list: print("Exiting. No files were found at the following {}paths: {}" .format('remote ' if args.remote_source else '', ', '.join(args.file))) sys.exit(1) if args.template_id: try: template = DatasetTemplate.retrieve(args.template_id) except SolveError as e: if e.status_code != 404: raise e print("No template with ID {0} found!".format(args.template_id)) sys.exit(1) elif args.template_file: template = _create_template_from_file(args.template_file, args.dry_run) else: template = None # Ensure the dataset exists. Create if necessary. if args.create_dataset: dataset = create_dataset(args, template=template) else: try: dataset = Object.get_by_full_path(full_path, assert_type='dataset') except solvebio.errors.NotFoundError: print("Dataset not found: {0}".format(full_path)) print("Tip: use the --create-dataset flag " "to create one from a template") sys.exit(1) if args.dry_run: print("Importing the following files/paths into dataset: {}" .format(full_path)) for file_ in files_list: if args.remote_source: print(file_.full_path) else: print(file_) return # Generate a manifest from the local files imports = [] for file_ in files_list: if args.remote_source: kwargs = dict(object_id=file_.id) else: manifest = solvebio.Manifest() manifest.add(file_) kwargs = dict(manifest=manifest.manifest) # Add template params if template: kwargs.update(template.import_params) # Create the import import_ = DatasetImport.create( dataset_id=dataset.id, commit_mode=args.commit_mode, **kwargs ) imports.append(import_) if args.follow: dataset.activity(follow=True) else: mesh_url = 'https://my.solvebio.com/activity/' print("Your import has been submitted, view details at: {0}" .format(mesh_url)) return imports, dataset
def upload(args): """ Given a folder or file, upload all the folders and files contained within it, skipping ones that already exist on the remote. """ base_remote_path, path_dict = Object.validate_full_path(args.full_path) # Assert the vault exists and is accessible vault = Vault.get_by_full_path(path_dict['vault_full_path']) # If not the vault root, validate remote path exists and is a folder if path_dict['path'] != '/': try: Object.get_by_full_path(base_remote_path, assert_type='folder') except: if not args.create_full_path: raise if args.dry_run: print('[Dry Run] Creating {}'.format(base_remote_path)) else: # Create the destination path (including subfolders) # if not found parent_folder_path = vault.full_path + ':' folders = path_dict['path'].lstrip('/').split('/') for folder in folders: folder_full_path = os.path.join(parent_folder_path, folder) parent_folder = _create_folder(vault, folder_full_path) parent_folder_path = parent_folder.full_path # Exit if there are multiple local paths and the # exclude paths are not absolute base_exclude_paths = args.exclude or [] if base_exclude_paths and len(args.local_path) > 1: rel_exclude_paths = [p for p in base_exclude_paths if not os.path.isabs(p)] local_path_parents = set([os.path.dirname(os.path.abspath(p)) for p in args.local_path]) if rel_exclude_paths and len(local_path_parents) > 1: sys.exit('Exiting. Cannot apply the --exclude relative paths when ' 'multiple upload paths with different parent directories ' 'are specified. Make --exclude paths absolute or run ' 'upload paths one at a time.') for local_path in args.local_path: # Expand local path and strip trailing slash local_path = os.path.abspath(local_path).rstrip('/') local_name = os.path.basename(local_path) # add basepath to excludes exclude_paths = [ os.path.join(local_path, os.path.normpath(exclude_path)) for exclude_path in base_exclude_paths ] if os.path.isdir(local_path): _upload_folder(path_dict['domain'], vault, base_remote_path, local_path, local_name, exclude_paths=exclude_paths, dry_run=args.dry_run) else: if args.dry_run: print('[Dry Run] Uploading {} to {}' .format(local_path, path_dict['path'])) else: Object.upload_file(local_path, path_dict['path'], vault.full_path)
def create_dataset(args, template=None): """ Attempt to create a new dataset given the following params: * template_id * template_file * capacity * tag * metadata * metadata_json_file * create_vault * full_path * dry_run """ if args.dry_run: print("NOTE: Running create-dataset command in dry run mode") full_path, path_dict = Object.validate_full_path(args.full_path) try: # Fail if a dataset already exists. Object.get_by_full_path(full_path, assert_type='dataset') print('A dataset already exists at path: {0}'.format(full_path)) sys.exit(1) except NotFoundError: pass # Accept a template_id or a template_file if template: # Template has already been validated/created # in the import command that called this pass elif args.template_id: try: template = DatasetTemplate.retrieve(args.template_id) except SolveError as e: if e.status_code != 404: raise e print("No template with ID {0} found!".format(args.template_id)) sys.exit(1) elif args.template_file: template = _create_template_from_file(args.template_file, args.dry_run) else: template = None if template: print("Creating new dataset {0} using the template '{1}'." .format(full_path, template.name)) fields = template.fields description = 'Created with dataset template: {0}' \ .format(str(template.id)) else: fields = [] description = None # Create dataset metadata # Looks at --metadata_json_file first and will update # that with any other key/value pairs passed in to --metadata metadata = {} if args.metadata and args.metadata_json_file: print('WARNING: Received --metadata and --metadata-json-file. ' 'Will update the JSON file values with the --metadata values') if args.metadata_json_file: with open(args.metadata_json_file, 'r') as fp: try: metadata = json.load(fp) except: print('Metadata JSON file {0} could not be loaded. Please ' 'pass valid JSON'.format(args.metadata_json_file)) sys.exit(1) if args.metadata: metadata.update(args.metadata) if args.dry_run: print("Creating new '{}' capacity dataset at {}" .format(args.capacity, full_path)) if description: print("Description: {}".format(description)) if fields: print("Fields: {}".format(fields)) if args.tag: print("Tags: {}".format(args.tag)) if metadata: print("Metadata: {}".format(metadata)) return return Dataset.get_or_create_by_full_path( full_path, capacity=args.capacity, fields=fields, description=description, tags=args.tag or [], metadata=metadata, create_vault=args.create_vault, )
def _upload_folder(domain, vault, base_remote_path, base_local_path, local_start): # Create the upload root folder if it does not exist on the remote try: upload_root_path, _ = Object.validate_full_path( os.path.join(base_remote_path, local_start) ) obj = Object.get_by_full_path(upload_root_path, assert_type='folder') except NotFoundError: base_remote_path, path_dict = \ Object.validate_full_path(base_remote_path) if path_dict['path'] == '/': parent_object_id = None else: obj = Object.get_by_full_path(base_remote_path, assert_type='folder') parent_object_id = obj.id # Create base folder new_folder = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=local_start ) print('Notice: Folder created for {0} at {1}'.format( base_local_path, new_folder.path, )) for root, dirs, files in os.walk(base_local_path): # Create the sub-folders that do not exist on the remote for d in dirs: dirpath = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), # noqa d ) try: Object.get_by_full_path(dirpath, object_type='folder') except NotFoundError: # Create the folder if os.path.dirname(dirpath.split(':')[-1]) == '/': parent_object_id = None else: parent_full_path = os.path.dirname(dirpath) parent = Object.get_by_full_path( parent_full_path, assert_type='folder') parent_object_id = parent.id # Make the API call new_obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=d, ) print('Notice: Folder created for {0} at {1}' .format(os.path.join(root, d), new_obj.path)) # Upload the files that do not yet exist on the remote for f in files: file_full_path = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, ) try: Object.get_by_full_path(file_full_path) except NotFoundError: parent_full_path = os.path.dirname( os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, ) ) parent = Object.get_by_full_path( parent_full_path, assert_type='folder') Object.upload_file(os.path.join(root, f), parent.path, vault.full_path)
def _get_parent_folder(self, path): from solvebio import Object return Object.get_by_full_path(':'.join([self.full_path, path]), assert_type='folder', client=self._client)
def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client local_path = os.path.expanduser(local_path) if os.stat(local_path).st_size == 0: print('Notice: Cannot upload empty file {0}'.format(local_path)) return # Get vault vault = Vault.get_by_full_path(vault_full_path, client=_client) # Get MD5, mimetype, and file size for the object md5, _ = md5sum(local_path, multipart_threshold=None) _, mimetype = mimetypes.guess_type(local_path) size = os.path.getsize(local_path) # Lookup parent object if remote_path == '/': parent_object_id = None else: parent_obj = Object.get_by_full_path(':'.join( [vault.full_path, remote_path]), assert_type='folder', client=_client) parent_object_id = parent_obj.id description = kwargs.get('description', 'File uploaded via python client') # Create the file, and upload it to the Upload URL obj = Object.create(vault_id=vault.id, parent_object_id=parent_object_id, object_type='file', filename=os.path.basename(local_path), md5=md5, mimetype=mimetype, size=size, description=description, client=_client) print('Notice: File created for {0} at {1}'.format( local_path, obj.path)) print('Notice: Upload initialized') upload_url = obj.upload_url headers = { 'Content-MD5': base64.b64encode(binascii.unhexlify(md5)), 'Content-Type': mimetype, 'Content-Length': str(size), } upload_resp = requests.put(upload_url, data=open(local_path, 'rb'), headers=headers) if upload_resp.status_code != 200: print('Notice: Upload status code for {0} was {1}'.format( local_path, upload_resp.status_code)) print('See error message below:') print(upload_resp.content) # Clean up the failed upload obj.delete(force=True) else: print('Notice: Successfully uploaded {0} to {1}'.format( local_path, obj.path)) return obj
def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client local_path = os.path.expanduser(local_path) if os.stat(local_path).st_size == 0: print('Notice: Cannot upload empty file {0}'.format(local_path)) return # Get vault vault = Vault.get_by_full_path(vault_full_path, client=_client) # Get MD5, mimetype, and file size for the object md5, _ = md5sum(local_path, multipart_threshold=None) _, mimetype = mimetypes.guess_type(local_path) size = os.path.getsize(local_path) # Lookup parent object if remote_path == '/': parent_object_id = None else: parent_obj = Object.get_by_full_path( ':'.join([vault.full_path, remote_path]), assert_type='folder', client=_client) parent_object_id = parent_obj.id description = kwargs.get( 'description', 'File uploaded via python client' ) # Create the file, and upload it to the Upload URL obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='file', filename=os.path.basename(local_path), md5=md5, mimetype=mimetype, size=size, description=description, client=_client ) print('Notice: File created for {0} at {1}'.format(local_path, obj.path)) print('Notice: Upload initialized') upload_url = obj.upload_url headers = { 'Content-MD5': base64.b64encode(binascii.unhexlify(md5)), 'Content-Type': mimetype, 'Content-Length': str(size), } # Use a session with a retry policy to handle connection errors. session = requests.Session() session.mount('https://', requests.adapters.HTTPAdapter(max_retries=5)) upload_resp = session.put(upload_url, data=open(local_path, 'rb'), headers=headers) if upload_resp.status_code != 200: print('Notice: Upload status code for {0} was {1}'.format( local_path, upload_resp.status_code )) print('See error message below:') print(upload_resp.content) # Clean up the failed upload obj.delete(force=True) else: print('Notice: Successfully uploaded {0} to {1}'.format(local_path, obj.path)) return obj
def _upload_folder(domain, vault, base_remote_path, base_local_path, local_start): # Create the upload root folder if it does not exist on the remote try: upload_root_path, _ = Object.validate_full_path( os.path.join(base_remote_path, local_start)) obj = Object.get_by_full_path(upload_root_path, assert_type='folder') except NotFoundError: base_remote_path, path_dict = \ Object.validate_full_path(base_remote_path) if path_dict['path'] == '/': parent_object_id = None else: obj = Object.get_by_full_path(base_remote_path, assert_type='folder') parent_object_id = obj.id # Create base folder new_folder = Object.create(vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=local_start) print('Notice: Folder created for {0} at {1}'.format( base_local_path, new_folder.path, )) for root, dirs, files in os.walk(base_local_path): # Create the sub-folders that do not exist on the remote for d in dirs: dirpath = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), # noqa d) try: Object.get_by_full_path(dirpath, object_type='folder') except NotFoundError: # Create the folder if os.path.dirname(dirpath.split(':')[-1]) == '/': parent_object_id = None else: parent_full_path = os.path.dirname(dirpath) parent = Object.get_by_full_path(parent_full_path, assert_type='folder') parent_object_id = parent.id # Make the API call new_obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=d, ) print('Notice: Folder created for {0} at {1}'.format( os.path.join(root, d), new_obj.path)) # Upload the files that do not yet exist on the remote for f in files: file_full_path = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, ) try: Object.get_by_full_path(file_full_path) except NotFoundError: parent_full_path = os.path.dirname( os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, )) parent = Object.get_by_full_path(parent_full_path, assert_type='folder') Object.upload_file(os.path.join(root, f), parent.path, vault.full_path)