コード例 #1
0
class GalaxyFileSystem():
    def __init__(self, url, user):
        u = urlsplit(url)
        if u.scheme != 'http' and u.scheme != 'https':
            raise ValueError("Invalid name node address")

        self.url = urlunparse((u.scheme, u.netloc, '', '', '', ''))
        self.localdir = ""
        self.prefix = 'GalaxyFS'
        self.lddaprefix = 'Libraries'
        self.hdaprefix = 'Histories'
        self.client = GalaxyInstance(self.url, user)

    def normalize_path(self, path):
        path = os.path.normpath(path)
        if path.startswith(self.prefix):
            path = path[len(self.prefix):]
        while path and path[0] == os.sep:
            path = path[1:]
        return os.path.join(self.localdir, path)

    def strip_root(self, path):
        if path.startswith(self.url):
            path = path[len(self.url):]
            if not path.startswith(self.localdir):
                raise 'Invalid hdfs path. It must start with the root directory'

        if not path.startswith(self.localdir):
            return path
        return path[len(self.localdir):]

    def make_fullpath(self, path):
        path = self.normalize_path(path)
        return os.path.join(self.prefix, path)

    def create_folder(self, path):
        try:
            path = self.normalize_path(path)
            parts = pathlib.Path(normalized_path).parts
            if len(parts) > 3:
                raise ValueError("Galaxy path may have maximum 3 parts.")
            if parts[0] == self.lddaprefix:
                id = self.client.libraries.create_library(parts[-1])
            else:
                id = self.client.histories.create_history(parts[-1])
            parts[-1] = id

            path = os.sep.join(parts)
            return self.make_fullpath(path)
        except:
            return None
        return path

    def remove(self, path):
        try:
            path = self.normalize_path(path)
            parts = pathlib.Path(normalized_path).parts
            if len(parts) == 3:
                raise ValueError("Galaxy path may have maximum 3 parts.")
            if parts[0] == self.lddaprefix:
                id = self.client.libraries.delete_library(library_id=parts[-1])
            else:
                id = self.client.histories.delete_history(history_id=parts[-1])
        except Exception as e:
            print(e)

    def rename(self, oldpath, newpath):
        try:
            oldpath = self.normalize_path(oldpath)
            newpath = self.normalize_path(newpath)
            self.client.rename(oldpath, newpath)
        except Exception as e:
            print(e)

    def get_files(self, path):
        path = self.normalize_path(path)
        files = []
        for f in self.client.list(path):
            status = self.client.status(join(path, f), False)
            if status['type'] != "DIRECTORY":
                files.append(f)
        return files

    def get_folders(self, path):
        try:
            path = self.normalize_path(path)
            parts = pathlib.Path(normalized_path).parts
            if len(parts) > 3:
                raise ValueError("Galaxy path may have maximum 3 parts.")
            if parts[0] == self.lddaprefix:
                id = self.client.libraries.create_library(parts[-1])
            else:
                id = self.client.histories.create_history(parts[-1])
            parts[-1] = id

            path = os.sep.join(parts)
            return self.make_fullpath(path)
        except:
            return []
        return path

    def exists(self, path):
        return self.isdir(path) or self.ispath(path)

    def isdir(self, path):
        path = self.normalize_path(path)
        return path == self.lddaprefix or path == self.hdaprefix

    def isfile(self, path):
        return not self.isdir(path) and self.name_from_id(path)

    def read(self, path):
        path = self.normalize_path(path)
        with self.client.read(path) as reader:
            return reader.read().decode('utf-8')

    def write(self, path, content):
        path = self.normalize_path(path)
        self.client.write(path, content)

    def name_from_id(self, path):
        normalized_path = self.normalize_path(path)
        parts = pathlib.Path(normalized_path).parts
        if len(parts) == 0:
            return ""
        elif len(parts) == 1:
            return self.lddaprefix if parts[
                0] == self.lddaprefix else self.hdaprefix
        elif len(parts) == 2:
            info = self.client.libraries.get_libraries(
                library_id=parts[1]
            )[0] if parts[
                0] == self.lddaprefix else self.client.histories.get_histories(
                    history_id=parts[1])[0]
        else:
            hda_or_ldda = 'ldda' if parts[0] == self.lddaprefix else 'hda'
            info = self.client.datasets.show_dataset(
                dataset_id=os.path.basename(normalized_path),
                hda_ldda=hda_or_ldda)

        if info:
            return info['name']

    def make_json(self, path):
        normalized_path = self.normalize_path(path)
        if not normalized_path:
            return [
                self.make_json(self.lddaprefix),
                self.make_json(self.hdaprefix)
            ]
        else:
            data_json = {
                'path': os.path.join(self.url, normalized_path),
                'text': self.name_from_id(path)
            }
            parts = pathlib.Path(normalized_path).parts
            if parts[0] == self.lddaprefix:
                if len(parts) == 1:
                    data_json['folder'] = True
                    libraries = self.client.libraries.get_libraries()
                    data_json['nodes'] = [
                        self.make_json(os.path.join(path, fn['id']))
                        for fn in libraries
                    ]
                elif len(parts) == 2:
                    data_json['folder'] = True
                    #library = self.client.libraries.get_libraries(library_id = parts[1])
                    #data_json['nodes'] = [self.make_json(os.path.join(path, fn['id'])) for fn in libraries]
            elif parts[0] == self.hdaprefix:
                if len(parts) == 1:
                    data_json['folder'] = True
                    histories = self.client.histories.get_histories()
                    data_json['nodes'] = [
                        self.make_json(os.path.join(path, fn['id']))
                        for fn in histories
                    ]
                elif len(parts) == 2:
                    data_json['folder'] = True
                    datasets = self.client.histories.show_matching_datasets(
                        parts[1])
                    data_json['nodes'] = [
                        self.make_json(os.path.join(path, fn['id']))
                        for fn in datasets
                    ]
            return data_json

    def save_upload(self, file, fullpath):
        localpath = os.path.join(tempfile.gettempdir(),
                                 os.path.basename(fullpath))
        if os.path.isfile(localpath):
            os.remove(localpath)
        try:
            file.save(localpath)
            if isfile(fullpath):
                fullpath = os.path.dirname(fullpath)
            self.client.upload(self.normalize_path(fullpath), localpath, True)
        except:
            pass

    def download(self, path):
        path = self.normalize_path(path)
        status = self.client.status(path, False)
        if status is not None and status['type'] == "FILE":
            localpath = os.path.join(tempfile.gettempdir(),
                                     os.path.basename(path))
            return self.client.download(path, localpath, True)
        else:
            return None