Example #1
0
def _create_folder(vault, full_path, tags=None):
    """Create a folder if not exists"""
    full_path, path_dict = \
        Object.validate_full_path(full_path)
    folder_name = path_dict['filename']

    try:
        new_obj = Object.get_by_full_path(full_path)
        if not new_obj.is_folder:
            raise SolveError('Object type {} already exists at location: {}'
                             .format(new_obj.object_type, full_path))
    except NotFoundError:
        # Create the folder
        if path_dict['parent_path'] == '/':
            parent_object_id = None
        else:
            parent = Object.get_by_full_path(path_dict['parent_full_path'],
                                             assert_type='folder')
            parent_object_id = parent.id

        # Make the API call
        new_obj = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='folder',
            filename=folder_name,
            tags=tags or []
        )

        print('Notice: Folder created for {0} at {1}'
              .format(folder_name, new_obj.path))

    return new_obj
Example #2
0
    def create_folder(self, filename, **params):
        from solvebio import Object

        path = params.pop('path', None)
        if path and path != '/':
            parent_object = self._get_parent_folder(path)
            params['parent_object_id'] = parent_object.id

        params.update({
            'filename': filename,
            'vault_id': self.id,
            'object_type': 'folder'
        })
        return Object.create(client=self._client, **params)
Example #3
0
    def create_folder(self, filename, **params):
        from solvebio import Object

        path = params.pop('path', None)
        if path and path != '/':
            parent_object = self._get_parent_folder(path)
            params['parent_object_id'] = parent_object.id

        params.update({
            'filename': filename,
            'vault_id': self.id,
            'object_type': 'folder'
        })
        return Object.create(client=self._client, **params)
Example #4
0
    def create_dataset(self, name, **params):
        from solvebio import Object

        params['vault_id'] = self.id
        params['object_type'] = 'dataset'
        path = params.pop('path', None)

        if path == '/' or path is None:
            params['parent_object_id'] = None
        else:
            parent_object = self._get_parent_folder(path)
            params['parent_object_id'] = parent_object.id

        params['filename'] = name
        return Object.create(**params)
Example #5
0
    def get_or_create_uploads_path(cls, **kwargs):
        from solvebio import Object
        _client = kwargs.pop('client', None) or cls._client or client
        v = cls.get_personal_vault(client=_client)
        default_path = 'Uploads'
        full_path = '{0}:/{1}'.format(v.full_path, default_path)

        try:
            upload_dir = Object.get_by_full_path(full_path,
                                                 assert_type='folder',
                                                 client=_client)
        except NotFoundError:
            print(
                "Uploads directory not found. Creating {0}".format(full_path))
            upload_dir = Object.create(vault_id=v.id,
                                       object_type='folder',
                                       filename=default_path,
                                       client=_client)

        return upload_dir.path
Example #6
0
    def get_or_create_uploads_path(cls, **kwargs):
        from solvebio import Object
        _client = kwargs.pop('client', None) or cls._client or client
        v = cls.get_personal_vault(client=_client)
        default_path = 'Uploads'
        full_path = '{0}:/{1}'.format(v.full_path, default_path)

        try:
            upload_dir = Object.get_by_full_path(
                full_path, assert_type='folder', client=_client)
        except NotFoundError:
            print("Uploads directory not found. Creating {0}"
                  .format(full_path))
            upload_dir = Object.create(
                vault_id=v.id,
                object_type='folder',
                filename=default_path,
                client=_client
            )

        return upload_dir.path
Example #7
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        try:
            return Dataset.get_by_full_path(full_path, assert_type='dataset',
                                            client=_client)
        except NotFoundError:
            pass

        # Dataset not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path(
                '{0}:{1}'.format(parts['domain'], parts['vault']),
                client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception(
                    'Vault does not exist with name {0}:{1}'.format(
                        parts['domain'], parts['vault'])
                )
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the dataset if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client
            )
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Dataset.create(name=os.path.basename(object_path),
                              vault_id=vault.id,
                              vault_parent_object_id=parent_folder_id,
                              client=_client,
                              **kwargs)
Example #8
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        local_md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Check if object exists already and compare md5sums
        full_path, path_dict = Object.validate_full_path(os.path.join(
            '{}:{}'.format(vault.full_path, remote_path),
            os.path.basename(local_path)),
                                                         client=_client)
        try:
            obj = cls.get_by_full_path(full_path, client=_client)
            if not obj.is_file:
                print('WARNING: A {} currently exists at {}'.format(
                    obj.object_type, full_path))
            else:
                # Check against md5sum of remote file
                if obj.md5 == local_md5:
                    print('WARNING: File {} (md5sum {}) already exists, '
                          'not uploading'.format(full_path, local_md5))
                    return obj
                else:
                    print('WARNING: File {} exists on SolveBio with different '
                          'md5sum (local: {} vs remote: {}) Uploading anyway, '
                          'but not overwriting.'.format(
                              full_path, local_md5, obj.md5))
        except NotFoundError:
            pass

        # Lookup parent object
        if path_dict['parent_path'] == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(path_dict['parent_full_path'],
                                                 assert_type='folder',
                                                 client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get('description')

        # Create the file, and upload it to the Upload URL
        obj = Object.create(vault_id=vault.id,
                            parent_object_id=parent_object_id,
                            object_type='file',
                            filename=os.path.basename(local_path),
                            md5=local_md5,
                            mimetype=mimetype,
                            size=size,
                            description=description,
                            tags=kwargs.get('tags', []) or [],
                            client=_client)

        print('Notice: File created for {0} at {1}'.format(
            local_path, obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(local_md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        # Use a session with a retry policy to handle connection errors.
        session = requests.Session()
        max_retries = 5
        retry = Retry(
            total=max_retries,
            read=max_retries,
            connect=max_retries,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504, 400),
        )
        session.mount('https://',
                      requests.adapters.HTTPAdapter(max_retries=retry))
        upload_resp = session.put(upload_url,
                                  data=open(local_path, 'rb'),
                                  headers=headers)

        if upload_resp.status_code != 200:
            print('WARNING: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code))
            # Clean up the failed upload
            obj.delete(force=True)
            raise FileUploadError(upload_resp.content)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(
                local_path, obj.path))

        return obj
Example #9
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        # Check for object type assertion, if not explicitly added, see
        # if user has passed object_type, as their intent was to get/create
        # an object of that type.
        assert_type = kwargs.pop('assert_type',
                                 kwargs.get('object_type', None))

        try:
            return cls.get_by_full_path(full_path,
                                        assert_type=assert_type,
                                        client=_client)
        except NotFoundError:
            pass

        # Object type required when creating Object
        object_type = kwargs.get('object_type')
        if not object_type:
            raise Exception("'object_type' is required when creating a new "
                            "Object. Pass one of: file, folder, dataset")

        # TODO should we require file contents?
        # Technically a user could then use this object to the call
        # upload_file()
        # if object_type == 'file' and not kwargs.get('content'):
        #     raise Exception('')

        # Object not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path('{0}:{1}'.format(
                parts['domain'], parts['vault']),
                                                     client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception('Vault with name {0}:{1} does not exist. Pass '
                                'create_vault=True to auto-create'.format(
                                    parts['domain'], parts['vault']))
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the object if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client)
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Object.create(filename=os.path.basename(object_path),
                             vault_id=vault.id,
                             parent_object_id=parent_folder_id,
                             client=_client,
                             **kwargs)
Example #10
0
def _upload_folder(domain, vault, base_remote_path,
                   base_local_path, local_start):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start)
        )
        obj = Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)

        if path_dict['path'] == '/':
            parent_object_id = None
        else:
            obj = Object.get_by_full_path(base_remote_path,
                                          assert_type='folder')
            parent_object_id = obj.id

        # Create base folder
        new_folder = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='folder',
            filename=local_start
        )

        print('Notice: Folder created for {0} at {1}'.format(
            base_local_path,
            new_folder.path,
        ))

    for root, dirs, files in os.walk(base_local_path):

        # Create the sub-folders that do not exist on the remote
        for d in dirs:
            dirpath = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'),  # noqa
                d
            )

            try:
                Object.get_by_full_path(dirpath, object_type='folder')
            except NotFoundError:
                # Create the folder
                if os.path.dirname(dirpath.split(':')[-1]) == '/':
                    parent_object_id = None
                else:
                    parent_full_path = os.path.dirname(dirpath)
                    parent = Object.get_by_full_path(
                        parent_full_path, assert_type='folder')
                    parent_object_id = parent.id

                # Make the API call
                new_obj = Object.create(
                    vault_id=vault.id,
                    parent_object_id=parent_object_id,
                    object_type='folder',
                    filename=d,
                )

                print('Notice: Folder created for {0} at {1}'
                      .format(os.path.join(root, d), new_obj.path))

        # Upload the files that do not yet exist on the remote
        for f in files:
            file_full_path = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path),
                       '',
                       root).lstrip('/'),
                f,
            )
            try:
                Object.get_by_full_path(file_full_path)
            except NotFoundError:
                parent_full_path = os.path.dirname(
                    os.path.join(
                        base_remote_path,
                        re.sub('^' + os.path.dirname(base_local_path),
                               '',
                               root).lstrip('/'),
                        f,
                    )
                )
                parent = Object.get_by_full_path(
                    parent_full_path, assert_type='folder')
                Object.upload_file(os.path.join(root, f),
                                   parent.path, vault.full_path)
Example #11
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        try:
            return Dataset.get_by_full_path(full_path,
                                            assert_type='dataset',
                                            client=_client)
        except NotFoundError:
            pass

        # Dataset not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path('{0}:{1}'.format(
                parts['domain'], parts['vault']),
                                                     client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception(
                    'Vault does not exist with name {0}:{1}'.format(
                        parts['domain'], parts['vault']))
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the dataset if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client)
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Dataset.create(name=os.path.basename(object_path),
                              vault_id=vault.id,
                              vault_parent_object_id=parent_folder_id,
                              client=_client,
                              **kwargs)
Example #12
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Lookup parent object
        if remote_path == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(':'.join(
                [vault.full_path, remote_path]),
                                                 assert_type='folder',
                                                 client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get('description',
                                 'File uploaded via python client')

        # Create the file, and upload it to the Upload URL
        obj = Object.create(vault_id=vault.id,
                            parent_object_id=parent_object_id,
                            object_type='file',
                            filename=os.path.basename(local_path),
                            md5=md5,
                            mimetype=mimetype,
                            size=size,
                            description=description,
                            client=_client)

        print('Notice: File created for {0} at {1}'.format(
            local_path, obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        upload_resp = requests.put(upload_url,
                                   data=open(local_path, 'rb'),
                                   headers=headers)

        if upload_resp.status_code != 200:
            print('Notice: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code))
            print('See error message below:')
            print(upload_resp.content)
            # Clean up the failed upload
            obj.delete(force=True)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(
                local_path, obj.path))

        return obj
Example #13
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Lookup parent object
        if remote_path == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(
                ':'.join([vault.full_path, remote_path]),
                assert_type='folder', client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get(
            'description',
            'File uploaded via python client'
        )

        # Create the file, and upload it to the Upload URL
        obj = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='file',
            filename=os.path.basename(local_path),
            md5=md5,
            mimetype=mimetype,
            size=size,
            description=description,
            client=_client
        )

        print('Notice: File created for {0} at {1}'.format(local_path,
                                                           obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        # Use a session with a retry policy to handle connection errors.
        session = requests.Session()
        session.mount('https://', requests.adapters.HTTPAdapter(max_retries=5))
        upload_resp = session.put(upload_url,
                                  data=open(local_path, 'rb'),
                                  headers=headers)

        if upload_resp.status_code != 200:
            print('Notice: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code
            ))
            print('See error message below:')
            print(upload_resp.content)
            # Clean up the failed upload
            obj.delete(force=True)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(local_path,
                                                                    obj.path))

        return obj
Example #14
0
def _upload_folder(domain, vault, base_remote_path, base_local_path,
                   local_start):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start))
        obj = Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)

        if path_dict['path'] == '/':
            parent_object_id = None
        else:
            obj = Object.get_by_full_path(base_remote_path,
                                          assert_type='folder')
            parent_object_id = obj.id

        # Create base folder
        new_folder = Object.create(vault_id=vault.id,
                                   parent_object_id=parent_object_id,
                                   object_type='folder',
                                   filename=local_start)

        print('Notice: Folder created for {0} at {1}'.format(
            base_local_path,
            new_folder.path,
        ))

    for root, dirs, files in os.walk(base_local_path):

        # Create the sub-folders that do not exist on the remote
        for d in dirs:
            dirpath = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '',
                       root).lstrip('/'),  # noqa
                d)

            try:
                Object.get_by_full_path(dirpath, object_type='folder')
            except NotFoundError:
                # Create the folder
                if os.path.dirname(dirpath.split(':')[-1]) == '/':
                    parent_object_id = None
                else:
                    parent_full_path = os.path.dirname(dirpath)
                    parent = Object.get_by_full_path(parent_full_path,
                                                     assert_type='folder')
                    parent_object_id = parent.id

                # Make the API call
                new_obj = Object.create(
                    vault_id=vault.id,
                    parent_object_id=parent_object_id,
                    object_type='folder',
                    filename=d,
                )

                print('Notice: Folder created for {0} at {1}'.format(
                    os.path.join(root, d), new_obj.path))

        # Upload the files that do not yet exist on the remote
        for f in files:
            file_full_path = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '',
                       root).lstrip('/'),
                f,
            )
            try:
                Object.get_by_full_path(file_full_path)
            except NotFoundError:
                parent_full_path = os.path.dirname(
                    os.path.join(
                        base_remote_path,
                        re.sub('^' + os.path.dirname(base_local_path), '',
                               root).lstrip('/'),
                        f,
                    ))
                parent = Object.get_by_full_path(parent_full_path,
                                                 assert_type='folder')
                Object.upload_file(os.path.join(root, f), parent.path,
                                   vault.full_path)