예제 #1
0
def _create_folder(vault, full_path, tags=None):
    """Create a folder if not exists"""
    full_path, path_dict = \
        Object.validate_full_path(full_path)
    folder_name = path_dict['filename']

    try:
        new_obj = Object.get_by_full_path(full_path)
        if not new_obj.is_folder:
            raise SolveError('Object type {} already exists at location: {}'
                             .format(new_obj.object_type, full_path))
    except NotFoundError:
        # Create the folder
        if path_dict['parent_path'] == '/':
            parent_object_id = None
        else:
            parent = Object.get_by_full_path(path_dict['parent_full_path'],
                                             assert_type='folder')
            parent_object_id = parent.id

        # Make the API call
        new_obj = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='folder',
            filename=folder_name,
            tags=tags or []
        )

        print('Notice: Folder created for {0} at {1}'
              .format(folder_name, new_obj.path))

    return new_obj
예제 #2
0
def upload(args):
    """
    Given a folder or file, upload all the folders and files contained
    within it, skipping ones that already exist on the remote.
    """
    base_remote_path, path_dict = Object.validate_full_path(
        args.full_path, vault=args.vault, path=args.path)

    # Assert the vault exists and is accessible
    vault = Vault.get_by_full_path(path_dict['vault_full_path'])

    # If not the vault root, validate remote path exists and is a folder
    if path_dict['path'] != '/':
        Object.get_by_full_path(base_remote_path, assert_type='folder')

    for local_path in args.local_path:
        local_path = local_path.rstrip('/')
        local_start = os.path.basename(local_path)

        if os.path.isdir(local_path):
            _upload_folder(path_dict['domain'], vault,
                           base_remote_path, local_path, local_start)
        else:
            Object.upload_file(local_path, path_dict['path'],
                               vault.full_path)
예제 #3
0
def upload(args):
    """
    Given a folder or file, upload all the folders and files contained
    within it, skipping ones that already exist on the remote.
    """
    base_remote_path, path_dict = Object.validate_full_path(args.full_path,
                                                            vault=args.vault,
                                                            path=args.path)

    # Assert the vault exists and is accessible
    vault = Vault.get_by_full_path(path_dict['vault_full_path'])

    # If not the vault root, validate remote path exists and is a folder
    if path_dict['path'] != '/':
        Object.get_by_full_path(base_remote_path, assert_type='folder')

    for local_path in args.local_path:
        local_path = local_path.rstrip('/')
        local_start = os.path.basename(local_path)

        if os.path.isdir(local_path):
            _upload_folder(path_dict['domain'], vault, base_remote_path,
                           local_path, local_start)
        else:
            Object.upload_file(local_path, path_dict['path'], vault.full_path)
예제 #4
0
    def get_by_full_path(cls, full_path, **kwargs):
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        obj = Object.get_by_full_path(full_path, assert_type='dataset',
                                      client=_client)
        return Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs)
예제 #5
0
 def _get_parent_folder(self, path):
     from solvebio import Object
     return Object.get_by_full_path(
         ':'.join([self.full_path, path]),
         assert_type='folder',
         client=self._client
     )
예제 #6
0
def _download(full_path, local_folder_path, dry_run=False):
    """
    Given a folder or file, download all the files contained
    within it (not recursive).
    """
    if dry_run:
        print('Running in dry run mode. Not downloading any files.')

    local_folder_path = os.path.expanduser(local_folder_path)
    if not os.path.exists(local_folder_path):
        print("Creating local download folder {}".format(local_folder_path))
        if not dry_run:
            if not os.path.exists(local_folder_path):
                os.makedirs(local_folder_path)

    # API will determine depth based on number of "/" in the glob
    # Add */** to match in any vault (recursive)
    files = Object.all(glob=full_path, limit=1000, object_type='file')
    if not files:
        print("No file(s) found at --full-path {}\nIf attempting to download "
              "multiple files, try using a glob 'vault:/path/folder/*'"
              .format(full_path))

    for file_ in files:

        if not dry_run:
            file_.download(local_folder_path)

        print('Downloaded: {} to {}/{}'.format(
            file_.full_path, local_folder_path, file_.filename))
예제 #7
0
    def get_by_full_path(cls, full_path, **kwargs):
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        obj = Object.get_by_full_path(full_path,
                                      assert_type='dataset',
                                      client=_client)
        return Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs)
예제 #8
0
    def _object_list_helper(self, **params):
        from solvebio import Object

        params.update({
            'vault_id': self.id,
        })

        items = Object.all(client=self._client, **params)
        return items
예제 #9
0
    def _object_list_helper(self, **params):
        from solvebio import Object

        params.update({
            'vault_id': self.id,
        })

        items = Object.all(client=self._client, **params)
        return items
예제 #10
0
def tag(args):
    """Tags a list of paths with provided tags"""

    objects = []
    for full_path in args.full_path:
        # API will determine depth based on number of "/" in the glob
        # Add */** to match in any vault (recursive)
        objects.extend(list(Object.all(
            glob=full_path, permission='write', limit=1000)))

    seen_vaults = {}
    taggable_objects = []
    exclusions = args.exclude or []

    # Runs through all objects to get tagging candidates
    # taking exclusions and object_type filters into account.
    for object_ in objects:

        if should_exclude(object_.full_path, exclusions,
                          dry_run=args.dry_run):
            continue

        if should_tag_by_object_type(args, object_):
            taggable_objects.append(object_)
            seen_vaults[object_.vault_id] = 1

    if not taggable_objects:
        print('No taggable objects found at provided locations.')
        return

    # If args.no_input, changes will be applied immediately.
    # Otherwise, prints the objects and if tags will be applied or not
    for object_ in taggable_objects:
        object_.tag(
            args.tag, remove=args.remove,
            dry_run=args.dry_run, apply_save=args.no_input)

    # Prompts for confirmation and then runs with apply_save=True
    if not args.no_input:

        print('')
        res = raw_input(
            'Are you sure you want to apply the above changes to '
            '{} object(s) in {} vault(s)? [y/N] '
            .format(len(taggable_objects), len(seen_vaults.keys()))
        )
        print('')
        if res.strip().lower() != 'y':
            print('Not applying changes.')
            return

        for object_ in taggable_objects:
            object_.tag(
                args.tag, remove=args.remove,
                dry_run=args.dry_run, apply_save=True)
예제 #11
0
    def get_or_create_uploads_path(cls, **kwargs):
        from solvebio import Object
        _client = kwargs.pop('client', None) or cls._client or client
        v = cls.get_personal_vault(client=_client)
        default_path = 'Uploads'
        full_path = '{0}:/{1}'.format(v.full_path, default_path)

        try:
            upload_dir = Object.get_by_full_path(full_path,
                                                 assert_type='folder',
                                                 client=_client)
        except NotFoundError:
            print(
                "Uploads directory not found. Creating {0}".format(full_path))
            upload_dir = Object.create(vault_id=v.id,
                                       object_type='folder',
                                       filename=default_path,
                                       client=_client)

        return upload_dir.path
예제 #12
0
    def get_or_create_uploads_path(cls, **kwargs):
        from solvebio import Object
        _client = kwargs.pop('client', None) or cls._client or client
        v = cls.get_personal_vault(client=_client)
        default_path = 'Uploads'
        full_path = '{0}:/{1}'.format(v.full_path, default_path)

        try:
            upload_dir = Object.get_by_full_path(
                full_path, assert_type='folder', client=_client)
        except NotFoundError:
            print("Uploads directory not found. Creating {0}"
                  .format(full_path))
            upload_dir = Object.create(
                vault_id=v.id,
                object_type='folder',
                filename=default_path,
                client=_client
            )

        return upload_dir.path
예제 #13
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset
        * template_id
        * full_path
        * vault (optional, overrides the vault in full_path)
        * path (optional, overrides the path in full_path)
        * commit_mode
        * capacity
        * file (list)
        * follow (default: False)

    """
    full_path, path_dict = Object.validate_full_path(
        args.full_path, vault=args.vault, path=args.path)

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args)
    else:
        try:
            dataset = solvebio.Dataset.get_by_full_path(full_path)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e

            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    # Generate a manifest from the local files
    manifest = solvebio.Manifest()
    manifest.add(*args.file)

    # Create the manifest-based import
    imp = solvebio.DatasetImport.create(
        dataset_id=dataset.id,
        manifest=manifest.manifest,
        commit_mode=args.commit_mode
    )

    if args.follow:
        imp.follow()
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}"
              .format(mesh_url))
예제 #14
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset
        * template_id
        * full_path
        * vault (optional, overrides the vault in full_path)
        * path (optional, overrides the path in full_path)
        * commit_mode
        * capacity
        * file (list)
        * follow (default: False)

    """
    full_path, path_dict = Object.validate_full_path(args.full_path,
                                                     vault=args.vault,
                                                     path=args.path)

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args)
    else:
        try:
            dataset = solvebio.Dataset.get_by_full_path(full_path)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e

            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    # Generate a manifest from the local files
    manifest = solvebio.Manifest()
    manifest.add(*args.file)

    # Create the manifest-based import
    imp = solvebio.DatasetImport.create(dataset_id=dataset.id,
                                        manifest=manifest.manifest,
                                        commit_mode=args.commit_mode)

    if args.follow:
        imp.follow()
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}".format(
            mesh_url))
예제 #15
0
    def create_folder(self, filename, **params):
        from solvebio import Object

        path = params.pop('path', None)
        if path and path != '/':
            parent_object = self._get_parent_folder(path)
            params['parent_object_id'] = parent_object.id

        params.update({
            'filename': filename,
            'vault_id': self.id,
            'object_type': 'folder'
        })
        return Object.create(client=self._client, **params)
예제 #16
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Object

        # Assert this is a dataset (for the "get" in get_or_create)
        kwargs['assert_type'] = 'dataset'
        # Create this is a dataset (for the "create" in get_or_create)
        kwargs['object_type'] = 'dataset'

        _client = kwargs.pop('client', None) or cls._client or client
        obj = Object.get_or_create_by_full_path(full_path,
                                                client=_client,
                                                **kwargs)

        return cls.retrieve(obj.dataset_id)
예제 #17
0
    def create_folder(self, filename, **params):
        from solvebio import Object

        path = params.pop('path', None)
        if path and path != '/':
            parent_object = self._get_parent_folder(path)
            params['parent_object_id'] = parent_object.id

        params.update({
            'filename': filename,
            'vault_id': self.id,
            'object_type': 'folder'
        })
        return Object.create(client=self._client, **params)
예제 #18
0
    def create_dataset(self, name, **params):
        from solvebio import Object

        params['vault_id'] = self.id
        params['object_type'] = 'dataset'
        path = params.pop('path', None)

        if path == '/' or path is None:
            params['parent_object_id'] = None
        else:
            parent_object = self._get_parent_folder(path)
            params['parent_object_id'] = parent_object.id

        params['filename'] = name
        return Object.create(**params)
예제 #19
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Lookup parent object
        if remote_path == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(
                ':'.join([vault.full_path, remote_path]),
                assert_type='folder', client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get(
            'description',
            'File uploaded via python client'
        )

        # Create the file, and upload it to the Upload URL
        obj = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='file',
            filename=os.path.basename(local_path),
            md5=md5,
            mimetype=mimetype,
            size=size,
            description=description,
            client=_client
        )

        print('Notice: File created for {0} at {1}'.format(local_path,
                                                           obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        # Use a session with a retry policy to handle connection errors.
        session = requests.Session()
        session.mount('https://', requests.adapters.HTTPAdapter(max_retries=5))
        upload_resp = session.put(upload_url,
                                  data=open(local_path, 'rb'),
                                  headers=headers)

        if upload_resp.status_code != 200:
            print('Notice: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code
            ))
            print('See error message below:')
            print(upload_resp.content)
            # Clean up the failed upload
            obj.delete(force=True)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(local_path,
                                                                    obj.path))

        return obj
예제 #20
0
 def _get_parent_folder(self, path):
     from solvebio import Object
     return Object.get_by_full_path(':'.join([self.full_path, path]),
                                    assert_type='folder',
                                    client=self._client)
예제 #21
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset and it's args
            * capacity
            * template_id
            * template_file
            * capacity
            * tag
            * metadata
            * metadata_json_file
            * create_vault
        * full_path
        * commit_mode
        * remote_source
        * dry_run
        * follow
        * file (list)

    """
    if args.dry_run:
        print("NOTE: Running import command in dry run mode")

    full_path, path_dict = Object.validate_full_path(args.full_path)

    files_list = []
    if args.remote_source:
        # Validate files
        for file_fp in args.file:
            files_ = list(Object.all(glob=file_fp, limit=1000))
            if not files_:
                print("Did not find any {}files at path {}".format(
                    'remote ' if args.remote_source else '', file_fp))
            else:
                for file_ in files_:
                    print("Found file: {}".format(file_.full_path))
                    files_list.append(file_)

    else:
        # Local files
        # Note: if these are globs or folders, then this will
        # create a multi-file manifest which is deprecated
        # and should be updated to one file per import.
        files_list = [fp for fp in args.file]

    if not files_list:
        print("Exiting. No files were found at the following {}paths: {}"
              .format('remote ' if args.remote_source else '',
                      ', '.join(args.file)))
        sys.exit(1)

    if args.template_id:
        try:
            template = DatasetTemplate.retrieve(args.template_id)
        except SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        template = _create_template_from_file(args.template_file, args.dry_run)
    else:
        template = None

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args, template=template)
    else:
        try:
            dataset = Object.get_by_full_path(full_path, assert_type='dataset')
        except solvebio.errors.NotFoundError:
            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    if args.dry_run:
        print("Importing the following files/paths into dataset: {}"
              .format(full_path))
        for file_ in files_list:
            if args.remote_source:
                print(file_.full_path)
            else:
                print(file_)
        return

    # Generate a manifest from the local files
    imports = []
    for file_ in files_list:
        if args.remote_source:
            kwargs = dict(object_id=file_.id)
        else:
            manifest = solvebio.Manifest()
            manifest.add(file_)
            kwargs = dict(manifest=manifest.manifest)

        # Add template params
        if template:
            kwargs.update(template.import_params)

        # Create the import
        import_ = DatasetImport.create(
            dataset_id=dataset.id,
            commit_mode=args.commit_mode,
            **kwargs
        )

        imports.append(import_)

    if args.follow:
        dataset.activity(follow=True)
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}"
              .format(mesh_url))

    return imports, dataset
예제 #22
0
def upload(args):
    """
    Given a folder or file, upload all the folders and files contained
    within it, skipping ones that already exist on the remote.
    """

    base_remote_path, path_dict = Object.validate_full_path(args.full_path)

    # Assert the vault exists and is accessible
    vault = Vault.get_by_full_path(path_dict['vault_full_path'])

    # If not the vault root, validate remote path exists and is a folder
    if path_dict['path'] != '/':
        try:
            Object.get_by_full_path(base_remote_path, assert_type='folder')
        except:
            if not args.create_full_path:
                raise

            if args.dry_run:
                print('[Dry Run] Creating {}'.format(base_remote_path))
            else:
                # Create the destination path (including subfolders)
                # if not found
                parent_folder_path = vault.full_path + ':'
                folders = path_dict['path'].lstrip('/').split('/')
                for folder in folders:
                    folder_full_path = os.path.join(parent_folder_path, folder)
                    parent_folder = _create_folder(vault, folder_full_path)
                    parent_folder_path = parent_folder.full_path

    # Exit if there are multiple local paths and the
    # exclude paths are not absolute
    base_exclude_paths = args.exclude or []
    if base_exclude_paths and len(args.local_path) > 1:
        rel_exclude_paths = [p for p in base_exclude_paths
                             if not os.path.isabs(p)]
        local_path_parents = set([os.path.dirname(os.path.abspath(p))
                                  for p in args.local_path])
        if rel_exclude_paths and len(local_path_parents) > 1:
            sys.exit('Exiting. Cannot apply the --exclude relative paths when '
                     'multiple upload paths with different parent directories '
                     'are specified. Make --exclude paths absolute or run '
                     'upload paths one at a time.')

    for local_path in args.local_path:

        # Expand local path and strip trailing slash
        local_path = os.path.abspath(local_path).rstrip('/')
        local_name = os.path.basename(local_path)

        # add basepath to excludes
        exclude_paths = [
            os.path.join(local_path, os.path.normpath(exclude_path))
            for exclude_path in base_exclude_paths
        ]

        if os.path.isdir(local_path):
            _upload_folder(path_dict['domain'], vault,
                           base_remote_path, local_path,
                           local_name, exclude_paths=exclude_paths,
                           dry_run=args.dry_run)
        else:
            if args.dry_run:
                print('[Dry Run] Uploading {} to {}'
                      .format(local_path, path_dict['path']))
            else:
                Object.upload_file(local_path, path_dict['path'],
                                   vault.full_path)
예제 #23
0
def create_dataset(args, template=None):
    """
    Attempt to create a new dataset given the following params:

        * template_id
        * template_file
        * capacity
        * tag
        * metadata
        * metadata_json_file
        * create_vault
        * full_path
        * dry_run
    """
    if args.dry_run:
        print("NOTE: Running create-dataset command in dry run mode")

    full_path, path_dict = Object.validate_full_path(args.full_path)

    try:
        # Fail if a dataset already exists.
        Object.get_by_full_path(full_path, assert_type='dataset')
        print('A dataset already exists at path: {0}'.format(full_path))
        sys.exit(1)
    except NotFoundError:
        pass

    # Accept a template_id or a template_file
    if template:
        # Template has already been validated/created
        # in the import command that called this
        pass
    elif args.template_id:
        try:
            template = DatasetTemplate.retrieve(args.template_id)
        except SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        template = _create_template_from_file(args.template_file, args.dry_run)
    else:
        template = None

    if template:
        print("Creating new dataset {0} using the template '{1}'."
              .format(full_path, template.name))
        fields = template.fields
        description = 'Created with dataset template: {0}' \
            .format(str(template.id))
    else:
        fields = []
        description = None

    # Create dataset metadata
    # Looks at --metadata_json_file first and will update
    # that with any other key/value pairs passed in to --metadata
    metadata = {}
    if args.metadata and args.metadata_json_file:
        print('WARNING: Received --metadata and --metadata-json-file. '
              'Will update the JSON file values with the --metadata values')

    if args.metadata_json_file:
        with open(args.metadata_json_file, 'r') as fp:
            try:
                metadata = json.load(fp)
            except:
                print('Metadata JSON file {0} could not be loaded. Please '
                      'pass valid JSON'.format(args.metadata_json_file))
                sys.exit(1)

    if args.metadata:
        metadata.update(args.metadata)

    if args.dry_run:
        print("Creating new '{}' capacity dataset at {}"
              .format(args.capacity, full_path))
        if description:
            print("Description: {}".format(description))
        if fields:
            print("Fields: {}".format(fields))
        if args.tag:
            print("Tags: {}".format(args.tag))
        if metadata:
            print("Metadata: {}".format(metadata))
        return

    return Dataset.get_or_create_by_full_path(
        full_path,
        capacity=args.capacity,
        fields=fields,
        description=description,
        tags=args.tag or [],
        metadata=metadata,
        create_vault=args.create_vault,
    )
예제 #24
0
 def upload_file(self, local_path, remote_path, **kwargs):
     from solvebio import Object
     return Object.upload_file(local_path, remote_path, self.full_path,
                               **kwargs)
예제 #25
0
def create_dataset(args):
    """
    Attempt to create a new dataset given the following params:

        * template_id
        * template_file
        * capacity
        * create_vault
        * [argument] dataset name or full path

    NOTE: genome_build has been deprecated and is no longer used.

    """
    # For backwards compatibility, the "full_path" argument
    # can be a dataset filename, but only if vault and path
    # are set. If vault/path are both provided and there
    # are no forward-slashes in the "full_path", assume
    # the user has provided a dataset filename.
    if '/' not in args.full_path and args.vault and args.path:
        full_path, path_dict = Object.validate_full_path(
            '{0}:/{1}/{2}'.format(args.vault, args.path, args.full_path))
    else:
        full_path, path_dict = Object.validate_full_path(
            args.full_path, vault=args.vault, path=args.path)

    # Accept a template_id or a template_file
    if args.template_id:
        # Validate the template ID
        try:
            tpl = solvebio.DatasetTemplate.retrieve(args.template_id)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!"
                  .format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        mode = 'r'
        fopen = open
        if check_gzip_path(args.template_file):
            mode = 'rb'
            fopen = gzip.open

        # Validate the template file
        with fopen(args.template_file, mode) as fp:
            try:
                tpl_json = json.load(fp)
            except:
                print('Template file {0} could not be loaded. Please '
                      'pass valid JSON'.format(args.template_file))
                sys.exit(1)

        tpl = solvebio.DatasetTemplate.create(**tpl_json)
        print("A new dataset template was created with id: {0}".format(tpl.id))
    else:
        print("Creating a new dataset {0} without a template."
              .format(full_path))
        tpl = None
        fields = []
        entity_type = None
        description = None

    if tpl:
        print("Creating new dataset {0} using the template '{1}'."
              .format(full_path, tpl.name))
        fields = tpl.fields
        entity_type = tpl.entity_type
        # include template used to create
        description = 'Created with dataset template: {0}'.format(str(tpl.id))

    return solvebio.Dataset.get_or_create_by_full_path(
        full_path,
        capacity=args.capacity,
        entity_type=entity_type,
        fields=fields,
        description=description,
        create_vault=args.create_vault,
    )
예제 #26
0
def _upload_folder(domain, vault, base_remote_path,
                   base_local_path, local_start):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start)
        )
        obj = Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)

        if path_dict['path'] == '/':
            parent_object_id = None
        else:
            obj = Object.get_by_full_path(base_remote_path,
                                          assert_type='folder')
            parent_object_id = obj.id

        # Create base folder
        new_folder = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='folder',
            filename=local_start
        )

        print('Notice: Folder created for {0} at {1}'.format(
            base_local_path,
            new_folder.path,
        ))

    for root, dirs, files in os.walk(base_local_path):

        # Create the sub-folders that do not exist on the remote
        for d in dirs:
            dirpath = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'),  # noqa
                d
            )

            try:
                Object.get_by_full_path(dirpath, object_type='folder')
            except NotFoundError:
                # Create the folder
                if os.path.dirname(dirpath.split(':')[-1]) == '/':
                    parent_object_id = None
                else:
                    parent_full_path = os.path.dirname(dirpath)
                    parent = Object.get_by_full_path(
                        parent_full_path, assert_type='folder')
                    parent_object_id = parent.id

                # Make the API call
                new_obj = Object.create(
                    vault_id=vault.id,
                    parent_object_id=parent_object_id,
                    object_type='folder',
                    filename=d,
                )

                print('Notice: Folder created for {0} at {1}'
                      .format(os.path.join(root, d), new_obj.path))

        # Upload the files that do not yet exist on the remote
        for f in files:
            file_full_path = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path),
                       '',
                       root).lstrip('/'),
                f,
            )
            try:
                Object.get_by_full_path(file_full_path)
            except NotFoundError:
                parent_full_path = os.path.dirname(
                    os.path.join(
                        base_remote_path,
                        re.sub('^' + os.path.dirname(base_local_path),
                               '',
                               root).lstrip('/'),
                        f,
                    )
                )
                parent = Object.get_by_full_path(
                    parent_full_path, assert_type='folder')
                Object.upload_file(os.path.join(root, f),
                                   parent.path, vault.full_path)
예제 #27
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Lookup parent object
        if remote_path == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(':'.join(
                [vault.full_path, remote_path]),
                                                 assert_type='folder',
                                                 client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get('description',
                                 'File uploaded via python client')

        # Create the file, and upload it to the Upload URL
        obj = Object.create(vault_id=vault.id,
                            parent_object_id=parent_object_id,
                            object_type='file',
                            filename=os.path.basename(local_path),
                            md5=md5,
                            mimetype=mimetype,
                            size=size,
                            description=description,
                            client=_client)

        print('Notice: File created for {0} at {1}'.format(
            local_path, obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        upload_resp = requests.put(upload_url,
                                   data=open(local_path, 'rb'),
                                   headers=headers)

        if upload_resp.status_code != 200:
            print('Notice: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code))
            print('See error message below:')
            print(upload_resp.content)
            # Clean up the failed upload
            obj.delete(force=True)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(
                local_path, obj.path))

        return obj
예제 #28
0
 def upload_file(self, local_path, remote_path, **kwargs):
     from solvebio import Object
     return Object.upload_file(
         local_path, remote_path, self.full_path, **kwargs)
예제 #29
0
def _upload_folder(domain, vault, base_remote_path, base_local_path,
                   local_start):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start))
        obj = Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)

        if path_dict['path'] == '/':
            parent_object_id = None
        else:
            obj = Object.get_by_full_path(base_remote_path,
                                          assert_type='folder')
            parent_object_id = obj.id

        # Create base folder
        new_folder = Object.create(vault_id=vault.id,
                                   parent_object_id=parent_object_id,
                                   object_type='folder',
                                   filename=local_start)

        print('Notice: Folder created for {0} at {1}'.format(
            base_local_path,
            new_folder.path,
        ))

    for root, dirs, files in os.walk(base_local_path):

        # Create the sub-folders that do not exist on the remote
        for d in dirs:
            dirpath = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '',
                       root).lstrip('/'),  # noqa
                d)

            try:
                Object.get_by_full_path(dirpath, object_type='folder')
            except NotFoundError:
                # Create the folder
                if os.path.dirname(dirpath.split(':')[-1]) == '/':
                    parent_object_id = None
                else:
                    parent_full_path = os.path.dirname(dirpath)
                    parent = Object.get_by_full_path(parent_full_path,
                                                     assert_type='folder')
                    parent_object_id = parent.id

                # Make the API call
                new_obj = Object.create(
                    vault_id=vault.id,
                    parent_object_id=parent_object_id,
                    object_type='folder',
                    filename=d,
                )

                print('Notice: Folder created for {0} at {1}'.format(
                    os.path.join(root, d), new_obj.path))

        # Upload the files that do not yet exist on the remote
        for f in files:
            file_full_path = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '',
                       root).lstrip('/'),
                f,
            )
            try:
                Object.get_by_full_path(file_full_path)
            except NotFoundError:
                parent_full_path = os.path.dirname(
                    os.path.join(
                        base_remote_path,
                        re.sub('^' + os.path.dirname(base_local_path), '',
                               root).lstrip('/'),
                        f,
                    ))
                parent = Object.get_by_full_path(parent_full_path,
                                                 assert_type='folder')
                Object.upload_file(os.path.join(root, f), parent.path,
                                   vault.full_path)
예제 #30
0
def _upload_folder(domain, vault, base_remote_path, base_local_path,
                   local_start, exclude_paths=None, dry_run=False):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start)
        )
        Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)
        base_folder_path = os.path.join(base_remote_path, local_start)
        if dry_run:
            print('[Dry Run] Creating folder {}'.format(base_folder_path))
        else:
            _create_folder(vault, base_folder_path)

    # Create folders and upload files
    for abs_local_parent_path, folders, files in os.walk(base_local_path):

        # Strips off the local path and adds the parent directory at
        # each phase of the loop
        local_parent_path = re.sub(
            '^' + os.path.dirname(base_local_path), '', abs_local_parent_path
        ).lstrip('/')

        if should_exclude(abs_local_parent_path, exclude_paths,
                          dry_run=dry_run):
            continue

        remote_folder_full_path = \
            os.path.join(base_remote_path, local_parent_path)

        # Create folders
        for folder in folders:
            new_folder_path = os.path.join(abs_local_parent_path, folder)
            if should_exclude(new_folder_path, exclude_paths,
                              dry_run=dry_run):
                continue

            remote_path = os.path.join(remote_folder_full_path, folder)
            if dry_run:
                print('[Dry Run] Creating folder {}'.format(remote_path))
            else:
                _create_folder(vault, remote_path)

        # Upload the files that do not yet exist on the remote
        for f in files:
            local_file_path = os.path.join(abs_local_parent_path, f)
            if should_exclude(local_file_path, exclude_paths, dry_run=dry_run):
                continue

            if dry_run:
                print('[Dry Run] Uploading {} to {}'
                      .format(local_file_path, remote_folder_full_path))
            else:
                remote_parent = Object.get_by_full_path(
                    remote_folder_full_path, assert_type='folder')
                Object.upload_file(local_file_path, remote_parent.path,
                                   vault.full_path)
예제 #31
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        local_md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Check if object exists already and compare md5sums
        full_path, path_dict = Object.validate_full_path(os.path.join(
            '{}:{}'.format(vault.full_path, remote_path),
            os.path.basename(local_path)),
                                                         client=_client)
        try:
            obj = cls.get_by_full_path(full_path, client=_client)
            if not obj.is_file:
                print('WARNING: A {} currently exists at {}'.format(
                    obj.object_type, full_path))
            else:
                # Check against md5sum of remote file
                if obj.md5 == local_md5:
                    print('WARNING: File {} (md5sum {}) already exists, '
                          'not uploading'.format(full_path, local_md5))
                    return obj
                else:
                    print('WARNING: File {} exists on SolveBio with different '
                          'md5sum (local: {} vs remote: {}) Uploading anyway, '
                          'but not overwriting.'.format(
                              full_path, local_md5, obj.md5))
        except NotFoundError:
            pass

        # Lookup parent object
        if path_dict['parent_path'] == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(path_dict['parent_full_path'],
                                                 assert_type='folder',
                                                 client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get('description')

        # Create the file, and upload it to the Upload URL
        obj = Object.create(vault_id=vault.id,
                            parent_object_id=parent_object_id,
                            object_type='file',
                            filename=os.path.basename(local_path),
                            md5=local_md5,
                            mimetype=mimetype,
                            size=size,
                            description=description,
                            tags=kwargs.get('tags', []) or [],
                            client=_client)

        print('Notice: File created for {0} at {1}'.format(
            local_path, obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(local_md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        # Use a session with a retry policy to handle connection errors.
        session = requests.Session()
        max_retries = 5
        retry = Retry(
            total=max_retries,
            read=max_retries,
            connect=max_retries,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504, 400),
        )
        session.mount('https://',
                      requests.adapters.HTTPAdapter(max_retries=retry))
        upload_resp = session.put(upload_url,
                                  data=open(local_path, 'rb'),
                                  headers=headers)

        if upload_resp.status_code != 200:
            print('WARNING: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code))
            # Clean up the failed upload
            obj.delete(force=True)
            raise FileUploadError(upload_resp.content)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(
                local_path, obj.path))

        return obj
예제 #32
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        # Check for object type assertion, if not explicitly added, see
        # if user has passed object_type, as their intent was to get/create
        # an object of that type.
        assert_type = kwargs.pop('assert_type',
                                 kwargs.get('object_type', None))

        try:
            return cls.get_by_full_path(full_path,
                                        assert_type=assert_type,
                                        client=_client)
        except NotFoundError:
            pass

        # Object type required when creating Object
        object_type = kwargs.get('object_type')
        if not object_type:
            raise Exception("'object_type' is required when creating a new "
                            "Object. Pass one of: file, folder, dataset")

        # TODO should we require file contents?
        # Technically a user could then use this object to the call
        # upload_file()
        # if object_type == 'file' and not kwargs.get('content'):
        #     raise Exception('')

        # Object not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path('{0}:{1}'.format(
                parts['domain'], parts['vault']),
                                                     client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception('Vault with name {0}:{1} does not exist. Pass '
                                'create_vault=True to auto-create'.format(
                                    parts['domain'], parts['vault']))
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the object if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client)
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Object.create(filename=os.path.basename(object_path),
                             vault_id=vault.id,
                             parent_object_id=parent_folder_id,
                             client=_client,
                             **kwargs)
예제 #33
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        try:
            return Dataset.get_by_full_path(full_path,
                                            assert_type='dataset',
                                            client=_client)
        except NotFoundError:
            pass

        # Dataset not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path('{0}:{1}'.format(
                parts['domain'], parts['vault']),
                                                     client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception(
                    'Vault does not exist with name {0}:{1}'.format(
                        parts['domain'], parts['vault']))
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the dataset if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client)
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Dataset.create(name=os.path.basename(object_path),
                              vault_id=vault.id,
                              vault_parent_object_id=parent_folder_id,
                              client=_client,
                              **kwargs)
예제 #34
0
 def vault_object(self):
     from solvebio import Object
     return Object.retrieve(self['vault_object_id'], client=self._client)
예제 #35
0
 def vault_object(self):
     from solvebio import Object
     return Object.retrieve(self['vault_object_id'], client=self._client)
예제 #36
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        try:
            return Dataset.get_by_full_path(full_path, assert_type='dataset',
                                            client=_client)
        except NotFoundError:
            pass

        # Dataset not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path(
                '{0}:{1}'.format(parts['domain'], parts['vault']),
                client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception(
                    'Vault does not exist with name {0}:{1}'.format(
                        parts['domain'], parts['vault'])
                )
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the dataset if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client
            )
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Dataset.create(name=os.path.basename(object_path),
                              vault_id=vault.id,
                              vault_parent_object_id=parent_folder_id,
                              client=_client,
                              **kwargs)
예제 #37
0
def create_dataset(args):
    """
    Attempt to create a new dataset given the following params:

        * template_id
        * template_file
        * capacity
        * create_vault
        * [argument] dataset name or full path

    NOTE: genome_build has been deprecated and is no longer used.

    """
    # For backwards compatibility, the "full_path" argument
    # can be a dataset filename, but only if vault and path
    # are set. If vault/path are both provided and there
    # are no forward-slashes in the "full_path", assume
    # the user has provided a dataset filename.
    if '/' not in args.full_path and args.vault and args.path:
        full_path, path_dict = Object.validate_full_path('{0}:/{1}/{2}'.format(
            args.vault, args.path, args.full_path))
    else:
        full_path, path_dict = Object.validate_full_path(args.full_path,
                                                         vault=args.vault,
                                                         path=args.path)

    # Accept a template_id or a template_file
    if args.template_id:
        # Validate the template ID
        try:
            tpl = solvebio.DatasetTemplate.retrieve(args.template_id)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        mode = 'r'
        fopen = open
        if check_gzip_path(args.template_file):
            mode = 'rb'
            fopen = gzip.open

        # Validate the template file
        with fopen(args.template_file, mode) as fp:
            try:
                tpl_json = json.load(fp)
            except:
                print('Template file {0} could not be loaded. Please '
                      'pass valid JSON'.format(args.template_file))
                sys.exit(1)

        tpl = solvebio.DatasetTemplate.create(**tpl_json)
        print("A new dataset template was created with id: {0}".format(tpl.id))
    else:
        print(
            "Creating a new dataset {0} without a template.".format(full_path))
        tpl = None
        fields = []
        entity_type = None
        description = None

    if tpl:
        print("Creating new dataset {0} using the template '{1}'.".format(
            full_path, tpl.name))
        fields = tpl.fields
        entity_type = tpl.entity_type
        # include template used to create
        description = 'Created with dataset template: {0}'.format(str(tpl.id))

    return solvebio.Dataset.get_or_create_by_full_path(
        full_path,
        capacity=args.capacity,
        entity_type=entity_type,
        fields=fields,
        description=description,
        create_vault=args.create_vault,
    )