Ejemplo n.º 1
0
class GalaxyFileSystem():
    def __init__(self, url, user):
        u = urlsplit(url)
        if u.scheme != 'http' and u.scheme != 'https':
            raise ValueError("Invalid name node address")

        self.url = urlunparse((u.scheme, u.netloc, '', '', '', ''))
        self.localdir = ""
        self.prefix = 'GalaxyFS'
        self.lddaprefix = 'Libraries'
        self.hdaprefix = 'Histories'
        self.client = GalaxyInstance(self.url, user)

    def normalize_path(self, path):
        path = os.path.normpath(path)
        if path.startswith(self.prefix):
            path = path[len(self.prefix):]
        while path and path[0] == os.sep:
            path = path[1:]
        return os.path.join(self.localdir, path)

    def strip_root(self, path):
        if path.startswith(self.url):
            path = path[len(self.url):]
            if not path.startswith(self.localdir):
                raise 'Invalid hdfs path. It must start with the root directory'

        if not path.startswith(self.localdir):
            return path
        return path[len(self.localdir):]

    def make_fullpath(self, path):
        path = self.normalize_path(path)
        return os.path.join(self.prefix, path)

    def create_folder(self, path):
        try:
            path = self.normalize_path(path)
            parts = pathlib.Path(normalized_path).parts
            if len(parts) > 3:
                raise ValueError("Galaxy path may have maximum 3 parts.")
            if parts[0] == self.lddaprefix:
                id = self.client.libraries.create_library(parts[-1])
            else:
                id = self.client.histories.create_history(parts[-1])
            parts[-1] = id

            path = os.sep.join(parts)
            return self.make_fullpath(path)
        except:
            return None
        return path

    def remove(self, path):
        try:
            path = self.normalize_path(path)
            parts = pathlib.Path(normalized_path).parts
            if len(parts) == 3:
                raise ValueError("Galaxy path may have maximum 3 parts.")
            if parts[0] == self.lddaprefix:
                id = self.client.libraries.delete_library(library_id=parts[-1])
            else:
                id = self.client.histories.delete_history(history_id=parts[-1])
        except Exception as e:
            print(e)

    def rename(self, oldpath, newpath):
        try:
            oldpath = self.normalize_path(oldpath)
            newpath = self.normalize_path(newpath)
            self.client.rename(oldpath, newpath)
        except Exception as e:
            print(e)

    def get_files(self, path):
        path = self.normalize_path(path)
        files = []
        for f in self.client.list(path):
            status = self.client.status(join(path, f), False)
            if status['type'] != "DIRECTORY":
                files.append(f)
        return files

    def get_folders(self, path):
        try:
            path = self.normalize_path(path)
            parts = pathlib.Path(normalized_path).parts
            if len(parts) > 3:
                raise ValueError("Galaxy path may have maximum 3 parts.")
            if parts[0] == self.lddaprefix:
                id = self.client.libraries.create_library(parts[-1])
            else:
                id = self.client.histories.create_history(parts[-1])
            parts[-1] = id

            path = os.sep.join(parts)
            return self.make_fullpath(path)
        except:
            return []
        return path

    def exists(self, path):
        return self.isdir(path) or self.ispath(path)

    def isdir(self, path):
        path = self.normalize_path(path)
        return path == self.lddaprefix or path == self.hdaprefix

    def isfile(self, path):
        return not self.isdir(path) and self.name_from_id(path)

    def read(self, path):
        path = self.normalize_path(path)
        with self.client.read(path) as reader:
            return reader.read().decode('utf-8')

    def write(self, path, content):
        path = self.normalize_path(path)
        self.client.write(path, content)

    def name_from_id(self, path):
        normalized_path = self.normalize_path(path)
        parts = pathlib.Path(normalized_path).parts
        if len(parts) == 0:
            return ""
        elif len(parts) == 1:
            return self.lddaprefix if parts[
                0] == self.lddaprefix else self.hdaprefix
        elif len(parts) == 2:
            info = self.client.libraries.get_libraries(
                library_id=parts[1]
            )[0] if parts[
                0] == self.lddaprefix else self.client.histories.get_histories(
                    history_id=parts[1])[0]
        else:
            hda_or_ldda = 'ldda' if parts[0] == self.lddaprefix else 'hda'
            info = self.client.datasets.show_dataset(
                dataset_id=os.path.basename(normalized_path),
                hda_ldda=hda_or_ldda)

        if info:
            return info['name']

    def make_json(self, path):
        normalized_path = self.normalize_path(path)
        if not normalized_path:
            return [
                self.make_json(self.lddaprefix),
                self.make_json(self.hdaprefix)
            ]
        else:
            data_json = {
                'path': os.path.join(self.url, normalized_path),
                'text': self.name_from_id(path)
            }
            parts = pathlib.Path(normalized_path).parts
            if parts[0] == self.lddaprefix:
                if len(parts) == 1:
                    data_json['folder'] = True
                    libraries = self.client.libraries.get_libraries()
                    data_json['nodes'] = [
                        self.make_json(os.path.join(path, fn['id']))
                        for fn in libraries
                    ]
                elif len(parts) == 2:
                    data_json['folder'] = True
                    #library = self.client.libraries.get_libraries(library_id = parts[1])
                    #data_json['nodes'] = [self.make_json(os.path.join(path, fn['id'])) for fn in libraries]
            elif parts[0] == self.hdaprefix:
                if len(parts) == 1:
                    data_json['folder'] = True
                    histories = self.client.histories.get_histories()
                    data_json['nodes'] = [
                        self.make_json(os.path.join(path, fn['id']))
                        for fn in histories
                    ]
                elif len(parts) == 2:
                    data_json['folder'] = True
                    datasets = self.client.histories.show_matching_datasets(
                        parts[1])
                    data_json['nodes'] = [
                        self.make_json(os.path.join(path, fn['id']))
                        for fn in datasets
                    ]
            return data_json

    def save_upload(self, file, fullpath):
        localpath = os.path.join(tempfile.gettempdir(),
                                 os.path.basename(fullpath))
        if os.path.isfile(localpath):
            os.remove(localpath)
        try:
            file.save(localpath)
            if isfile(fullpath):
                fullpath = os.path.dirname(fullpath)
            self.client.upload(self.normalize_path(fullpath), localpath, True)
        except:
            pass

    def download(self, path):
        path = self.normalize_path(path)
        status = self.client.status(path, False)
        if status is not None and status['type'] == "FILE":
            localpath = os.path.join(tempfile.gettempdir(),
                                     os.path.basename(path))
            return self.client.download(path, localpath, True)
        else:
            return None
Ejemplo n.º 2
0
class GalaxyFileSystem():
    urlKey = 0
    hlddTitleKey = 1
    hlddKey = 2
    folderKey = 3
    hdaKey = 3
    lddaKey = 4

    def __init__(self, url, user):
        u = urlsplit(url)
        if u.scheme != 'http' and u.scheme != 'https':
            raise ValueError("Invalid name node address")

        self.url = urlunparse((u.scheme, u.netloc, '', '', '', ''))
        self.localdir = ""
        self.prefix = 'GalaxyFS'
        self.lddaprefix = 'Libraries'
        self.hdaprefix = 'Histories'
        self.client = GalaxyInstance(self.url, user)

    def typename(self):
        return "gfs"

    def strip_prefix(self, path):
        return path[len(self.prefix):] if self.prefix and path.startswith(
            self.prefix) else path

    def normalize_path(self, path):
        if self.prefix:
            path = self.strip_prefix(path)

        if self.url and path.startswith(self.url):
            return path

        if not self.localdir or path.startswith(self.localdir):
            return os.path.join(self.url, path)

        while path and path[0] == os.sep:
            path = path[1:]
        return os.path.join(self.url, self.localdir, path)

    def normalize_fullpath(self, path):
        return self.normalize_path(path)

    def strip_root(self, path):
        path = self.strip_prefix(path)
        if path.startswith(self.url):
            path = path[len(self.url):]
            if not path.startswith(self.localdir):
                raise 'Invalid hdfs path. It must start with the root directory'
        return path[len(self.localdir):] if path.startswith(
            self.localdir) else path

    def make_fullpath(self, path):
        path = self.normalize_path(path)
        return os.path.join(self.prefix, path)

    def makedirs(self, path):
        return self.mkdir(path)

    def mkdir(self, path):
        try:
            path = self.normalize_path(path)
            parts = self.path_parts(path)

            if len(parts) > 4 or len(parts) < 3:
                return ""  #raise ValueError("Galaxy path may have maximum 4 parts.")
            hd_ldd = ''
            if len(parts) == 3:
                hd_ldd = self.client.libraries.create_library(
                    parts[-1]) if self.islibrary(
                        parts[GalaxyFileSystem.hlddTitleKey]
                    ) else self.client.histories.create_history(parts[-1])
            elif len(parts) == 4:
                if not self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]):
                    return ""
                hd_ldd = self.client.libraries.create_folder(
                    parts[GalaxyFileSystem.lddaKey], parts[-1])

            parts[-1] = hd_ldd['id']

            return os.sep.join(parts['id'])
        except:
            return None

    def unique_fs_name(self, path, prefix, ext):
        return os.path.join(path, prefix + "_" + str(uuid.uuid4()) + ext)

    def remove(self, path):
        try:
            path = self.normalize_path(path)
            parts = self.path_parts(path)
            if len(parts) > 4:
                raise ValueError("Galaxy path may have maximum 4 parts.")
            if self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]):
                self.client.libraries.delete_library(library_id=parts[-1])
            else:
                self.client.histories.delete_history(history_id=parts[-1])
        except Exception as e:
            print(e)

    def rename(self, oldpath, newpath):
        try:
            oldpath = self.normalize_path(oldpath)
            newpath = self.normalize_path(newpath)
            self.client.rename(oldpath, newpath)
            return self.strip_root(newpath)
        except Exception as e:
            print(e)

    def copyfile(self, src, dst):
        if self.islibrarydata(src) and not self.islibrarydata(dst):
            parts = self.path_parts(self.normalize_path(src))
            self.client.libraries.copy_from_dataset(
                parts[GalaxyFileSystem.hlddKey], self.id_from_path(dst),
                parts[GalaxyFileSystem.folderKey])
        elif not self.islibrary(parts[
                GalaxyFileSystem.hlddTitleKey]) and self.islibrarydata(dst):
            parts = self.path_parts(self.normalize_path(dst))
            self.client.histories.upload_dataset_from_library(
                parts[GalaxyFileSystem.hdaKey], self.id_from_path(src))
        else:
            content = self.read(src)
            if self.isdir(dst):
                dst = os.path.join(dst, os.path.basename(src))
                self.write(dst, content)
        return self.normalize_fullpath(dst)

    def get_files(self, path):
        path = self.normalize_path(path)
        files = []
        for f in self.client.list(path):
            status = self.client.status(join(path, f), False)
            if status['type'] != "DIRECTORY":
                files.append(f)
        return files

    def get_folders(self, path):
        try:
            normalized_path = self.normalize_path(path)
            parts = self.path_parts(normalized_path)
            if len(parts) > 4:
                raise ValueError("Galaxy path may have maximum 4 parts.")

            parts[-1] = self.client.libraries.create_library(
                parts[-1]) if self.islibrary(
                    parts[GalaxyFileSystem.hlddTitleKey]
                ) else self.client.histories.create_history(parts[-1])

            path = os.sep.join(parts)
            return self.make_fullpath(path)
        except:
            return []
        return path

    def exists(self, path):
        return self.isdir(path) or self.isfile(path)

    def islibrary(self, name):
        return name == self.lddaprefix

    def islibrarydata(self, path):
        normalized_path = self.normalize_path(path)
        parts = self.path_parts(normalized_path)
        return self.islibrary(parts[GalaxyFileSystem.hlddTitleKey])

    def isdir(self, path):
        normalized_path = self.normalize_path(path)
        parts = self.path_parts(normalized_path)
        return len(parts) <= GalaxyFileSystem.lddaKey if self.islibrary(
            parts[GalaxyFileSystem.
                  hlddTitleKey]) else len(parts) <= GalaxyFileSystem.hdaKey

    def isfile(self, path):
        return not self.isdir(path) and self.name_from_id(path) != ""

    def join(self, path1, path2):
        path1 = self.normalize_path(path1)
        return os.path.join(path1, path2)

    def make_unique_dir(self, path):
        unique_dir = self.join(path, str(uuid.uuid4()))
        self.makedirs(unique_dir)
        return unique_dir

    def read(self, path):
        path = self.normalize_path(path)
        with self.client.read(path, 'rb') as reader:
            return reader.read()

    def write(self, path, content):
        path = self.normalize_path(path)
        self.client.write(path, content)

    def id_from_path(self, path):
        normalized_path = self.normalize_path(path)
        parts = self.path_parts(normalized_path)
        if len(parts) <= GalaxyFileSystem.urlKey + 1:
            return ""
        elif len(parts
                 ) == GalaxyFileSystem.hlddTitleKey + 1:  #Histories/Libraries
            return parts[GalaxyFileSystem.hlddTitleKey]
        elif len(parts
                 ) == GalaxyFileSystem.hlddKey + 1:  #library-name/history-name
            info = self.client.libraries.get_libraries(
                library_id=parts[GalaxyFileSystem.hlddKey]
            )[0] if parts[
                GalaxyFileSystem.
                hlddTitleKey] == self.lddaprefix else self.client.histories.get_histories(
                    history_id=parts[GalaxyFileSystem.hlddKey])[0]
            return info['id']
        elif len(
                parts
        ) == GalaxyFileSystem.folderKey + 1:  #Folder(library)/Dataset(history)
            if parts[GalaxyFileSystem.hlddTitleKey] == self.lddaprefix:
                folder = self.client.folders.show_folder(parts[3], False)
                return folder['id']
            else:
                info = self.client.datasets.show_dataset(
                    dataset_id=parts[GalaxyFileSystem.hdaKey], hda_ldda='hda')
                return info['id']
        elif len(parts) == GalaxyFileSystem.lddaKey + 1:
            info = self.client.datasets.show_dataset(
                dataset_id=parts[GalaxyFileSystem.lddaKey], hda_ldda='ldda')
            return info['id']

    def path_parts(self, path):
        parts = []
        if path.startswith(self.prefix):
            parts.append(self.prefix)
            if len(path) > len(self.prefix):
                path = path[len(self.prefix) + 1:]
        elif path.startswith(self.url):
            parts.append(self.url)
            if len(path) > len(self.url):
                path = path[len(self.url) + 1:]

        parts.extend(pathlib.Path(path).parts)
        return parts

    def name_from_id(self, path):
        normalized_path = self.normalize_path(path)
        parts = self.path_parts(normalized_path)
        if len(parts) <= GalaxyFileSystem.urlKey + 1:
            return ""
        elif len(parts
                 ) == GalaxyFileSystem.hlddTitleKey + 1:  #Histories/Libraries
            return parts[GalaxyFileSystem.hlddTitleKey]
        elif len(parts
                 ) == GalaxyFileSystem.hlddKey + 1:  #library-name/history-name
            info = self.client.libraries.get_libraries(library_id=parts[
                GalaxyFileSystem.hlddKey])[0] if self.islibrary(
                    parts[GalaxyFileSystem.hlddTitleKey]
                ) else self.client.histories.get_histories(
                    history_id=parts[GalaxyFileSystem.hlddKey])[0]
            return info['name']
        elif len(
                parts
        ) == GalaxyFileSystem.folderKey + 1:  #Folder(library)/Dataset(history)
            if parts[GalaxyFileSystem.hlddTitleKey] == self.lddaprefix:
                folder = self.client.folders.show_folder(
                    parts[GalaxyFileSystem.folderKey], False)
                return folder['name']
            else:
                info = self.client.datasets.show_dataset(
                    dataset_id=parts[GalaxyFileSystem.hdaKey], hda_ldda='hda')
                return info['name']
        elif len(parts) == GalaxyFileSystem.lddaKey + 1:
            info = self.client.datasets.show_dataset(
                dataset_id=parts[GalaxyFileSystem.lddaKey], hda_ldda='ldda')
            return info['name']

    def make_json_item(self, path):
        data_json = {
            'path':
            self.normalize_path(path),
            'text':
            "{0}(id:{1})".format(self.name_from_id(path),
                                 self.id_from_path(path))
        }
        if self.isdir(path):
            data_json['nodes'] = []
        return data_json

    def make_json(self, path):
        normalized_path = self.normalize_path(path)
        if not normalized_path or normalized_path == self.url:
            return [
                self.make_json_item(urljoin(self.url, self.lddaprefix)),
                self.make_json_item(urljoin(self.url, self.hdaprefix))
            ]
        else:
            data_json = self.make_json_item(path)
            parts = self.path_parts(normalized_path)
            if self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]):
                if len(parts) == GalaxyFileSystem.hlddTitleKey + 1:
                    libraries = self.client.libraries.get_libraries()
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in libraries
                    ]
                elif len(parts) == GalaxyFileSystem.hlddKey + 1:
                    folders = self.client.libraries.get_folders(
                        library_id=parts[GalaxyFileSystem.hlddKey])
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in folders
                    ]
                elif len(parts) == GalaxyFileSystem.folderKey + 1:
                    folder = self.client.folders.show_folder(
                        parts[GalaxyFileSystem.folderKey], True)
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in folder['folder_contents']
                    ]
            else:
                if len(parts) == GalaxyFileSystem.hlddTitleKey + 1:
                    histories = self.client.histories.get_histories()
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in histories
                    ]
                elif len(parts) == GalaxyFileSystem.hlddKey + 1:
                    datasets = self.client.histories.show_matching_datasets(
                        parts[GalaxyFileSystem.hlddKey])
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in datasets
                    ]

            data_json['loaded'] = True
            return data_json

    @staticmethod
    def get_history_path(url, history_id, data_id):
        return urljoin(url, os.path.join('Histories', history_id, data_id))

    def make_json_r(self, path):
        normalized_path = self.normalize_path(path)
        if not normalized_path or normalized_path == self.url:
            return [
                self.make_json_r(urljoin(self.url, self.lddaprefix)),
                self.make_json_r(urljoin(self.url, self.hdaprefix))
            ]
        else:
            data_json = self.make_json_item(path)
            parts = self.path_parts(normalized_path)
            if self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]):
                if len(parts) == GalaxyFileSystem.hlddTitleKey + 1:
                    libraries = self.client.libraries.get_libraries()
                    data_json['nodes'] = [
                        self.make_json_r(os.path.join(path, fn['id']))
                        for fn in libraries
                    ]
                elif len(parts) == GalaxyFileSystem.hlddKey + 1:
                    folders = self.client.libraries.get_folders(
                        library_id=parts[GalaxyFileSystem.hlddKey])
                    data_json['nodes'] = [
                        self.make_json_r(os.path.join(path, fn['id']))
                        for fn in folders
                    ]
                elif len(parts) == GalaxyFileSystem.folderKey + 1:
                    folder = self.client.folders.show_folder(
                        parts[GalaxyFileSystem.folderKey], True)
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in folder['folder_contents']
                    ]
            else:
                if len(parts) == GalaxyFileSystem.hlddTitleKey + 1:
                    histories = self.client.histories.get_histories()
                    data_json['nodes'] = [
                        self.make_json_r(os.path.join(path, fn['id']))
                        for fn in histories
                    ]
                elif len(parts) == GalaxyFileSystem.hlddKey + 1:
                    datasets = self.client.histories.show_matching_datasets(
                        parts[GalaxyFileSystem.hlddKey])
                    data_json['nodes'] = [
                        self.make_json_item(os.path.join(path, fn['id']))
                        for fn in datasets
                    ]

            data_json['loaded'] = True
            return data_json

    def save_upload(self, file, path):
        if self.isfile(path):
            path = os.path.dirname(path)
        elif not self.isdir(path):
            return ""
        parts = self.path_parts(path)
        if not parts or len(parts) < 3:
            return ""

        localpath = os.path.join(tempfile.gettempdir(),
                                 os.path.basename(file.filename))

        if os.path.exists(localpath):
            fs = PosixFileSystem('/')
            unique_dir = fs.make_unique_dir(os.path.dirname(localpath))
            localpath = os.path.join(unique_dir,
                                     os.path.basename(file.filename))

        try:
            file.save(localpath)

            dataset = ''
            if self.islibrary(parts[1]):
                dataset = self.client.libraries.upload_file_from_local_path(
                    parts[2],
                    localpath,
                    folder_id=parts[3] if len(parts) > 3 else None)
            else:
                dataset = self.client.tools.upload_file(localpath, parts[2])

            if dataset:
                return os.path.join(path, dataset['id'])
        except:
            pass

    def download(self, path):
        path = self.normalize_path(path)
        if self.isdir(path):
            return None

        dataset = self.client.datasets.show_dataset(
            dataset_id=os.path.basename(path),
            hda_ldda='ldda' if self.islibrarydata(path) else 'hda')
        name = dataset['name']
        if not pathlib.Path(name).suffix and dataset['file_ext']:
            name += '.' + dataset['file_ext']

        localpath = os.path.join(tempfile.gettempdir(), name)

        if os.path.exists(localpath):
            fs = PosixFileSystem('/')
            unique_dir = fs.make_unique_dir(os.path.dirname(localpath))
            localpath = os.path.join(unique_dir, name)

        self.client.datasets.download_dataset(os.path.basename(path),
                                              file_path=localpath,
                                              use_default_filename=False)
        return localpath