def get(datasets_identifiers, identifier_type='hid', history_id=None): """ Given the history_id that is displayed to the user, this function will download the file[s] from the history and stores them under /import/ Return value[s] are the path[s] to the dataset[s] stored under /import/ """ history_id = history_id or os.environ['HISTORY_ID'] # The object version of bioblend is to slow in retrieving all datasets from a history # fallback to the non-object path gi = get_galaxy_connection(history_id=history_id, obj=False) for dataset_identifier in datasets_identifiers: file_path = '/import/%s' % dataset_identifier log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id, dataset_identifier) # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datasets = {ds[identifier_type]: ds['id'] for ds in history} if identifier_type == 'hid': dataset_identifier = int(dataset_identifier) dc.download_dataset(datasets[dataset_identifier], file_path=file_path, use_default_filename=False) else: log.debug('Cached, not re-downloading') return file_path
def get( dataset_id, history_id = None ): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ conf = _get_conf() gi = get_galaxy_connection() hc = HistoryClient( gi ) dc = DatasetClient( gi ) file_path = '/import/%s' % dataset_id history_id = history_id or _get_history_id() # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): dataset_mapping = dict( [(dataset['hid'], dataset['id']) for dataset in hc.show_history( history_id, contents=True )] ) try: hc.download_dataset( history_id, dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None ) except: dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False) return file_path
def get(dataset_id, history_id=None): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ conf = _get_conf() gi = get_galaxy_connection() hc = HistoryClient(gi) dc = DatasetClient(gi) file_path = '/import/%s' % dataset_id history_id = history_id or _get_history_id() # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): dataset_mapping = dict([ (dataset['hid'], dataset['id']) for dataset in hc.show_history(history_id, contents=True) ]) try: hc.download_dataset(history_id, dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None) except: dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False) return file_path
def get(dataset_id, history_id=None): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ history_id = history_id or os.environ['HISTORY_ID'] # The object version of bioblend is to slow in retrieving all datasets from a history # fallback to the non-object path gi = get_galaxy_connection(history_id=history_id, obj=False) file_path = '/import/%s' % dataset_id log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id, dataset_id) # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datasets = {ds['hid']: ds['id'] for ds in history} dc.download_dataset(datasets[dataset_id], file_path=file_path, use_default_filename=False) else: log.debug('Cached, not re-downloading') return file_path
def get_user_history (history_id=None): """ Get all visible dataset infos of user history. Return a list of dict of each dataset. """ history_id = history_id or os.environ['HISTORY_ID'] gi = get_galaxy_connection(history_id=history_id, obj=False) hc = HistoryClient(gi) history = hc.show_history(history_id, visible=True, contents=True) return history
def get_user_history(history_id=None): """ Get all visible dataset infos of user history. Return a list of dict of each dataset. """ history_id = history_id or os.environ['HISTORY_ID'] gi = get_galaxy_connection(history_id=history_id, obj=False) hc = HistoryClient(gi) history = hc.show_history(history_id, visible=True, contents=True) return history
def get( dataset_id ): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ conf = _get_conf() gi = get_galaxy_connection() hc = HistoryClient( gi ) dc = DatasetClient( gi ) file_path = '/import/%s' % dataset_id dataset_mapping = dict( [(dataset['hid'], dataset['id']) for dataset in hc.show_history(conf['history_id'], contents=True)] ) try: hc.download_dataset(conf['history_id'], dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None) except: dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False) return file_path
def get_workflow_status(user): # go through every galaxy instance gits = GalaxyInstanceTracking.objects.filter( galaxyuser__internal_user=user) dj_wfs = Workflow.objects.all() # loop through instances status = [] for git in gits: ## loop through workflows for that instance gi, gu = get_gi_gu(user, git) wc = WorkflowClient(gi) hc = HistoryClient(gi) wfs = wc.get_workflows() for wf in wfs: wfd = wc.show_workflow(wf['id']) winvoke = wc.get_invocations(wf['id']) for wi in winvoke: wid = wc.show_invocation(wf['id'], wi['id']) h_l = hc.get_histories(wid['history_id'], deleted=True) if h_l: h = h_l[0] else: continue sd = get_status_d(wid) sd['name'] = wfd['name'] hd = hc.show_history(h['id']) sd['history_name'] = h['name'] datetime_object = datetime.strptime(hd['update_time'], '%Y-%m-%dT%H:%M:%S.%f') # sd['history_url'] = '{}{}'.format(git.url, hd['url']) sd['update_time'] = datetime_object.strftime( '%Y-%m-%d %H:%M:%S') sd['update_time_unix'] = unixtime(datetime_object) sd['galaxy_instance'] = git.name status.append(sd) status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True) return status
def get_history_status(user, hist_id=None): # go through every galaxy instance gits = GalaxyInstanceTracking.objects.filter( galaxyuser__internal_user=user) # loop through instances status = [] for git in gits: ## loop through workflows for that instance gi, gu = get_gi_gu(user, git) hc = HistoryClient(gi) hists = hc.get_histories() # loop through and create a list of dictionaries for our django table for hist in hists: sd = {} # add useful info if hist_id and hist['id'] != hist_id: continue history_info = hc.show_history(hist['id']) # add status info sd_bioblend = hc.get_status(hist['id']) state_details = sd_bioblend['state_details'] sd.update(state_details) sd['estimated_progress'] = sd_bioblend['percent_complete'] datetime_object = datetime.strptime(history_info['update_time'], '%Y-%m-%dT%H:%M:%S.%f') sd['update_time'] = datetime_object.strftime('%Y-%m-%d %H:%M:%S') sd['update_time_unix'] = unixtime(datetime_object) sd['galaxy_instance'] = git.name sd['name'] = hist['name'] hsq = History.objects.filter(galaxy_id=hist['id'], galaxyinstancetracking=git) if hsq: hs = hsq[0] hs.name = hist['name'] hs.update_time = datetime_object.strftime('%Y-%m-%d %H:%M:%S') hs.empty = state_details['empty'] hs.error = state_details['error'] hs.failed_metadata = state_details['failed_metadata'] hs.new = state_details['new'] hs.ok = state_details['ok'] hs.paused = state_details['paused'] hs.running = state_details['running'] hs.queued = state_details['queued'] hs.setting_metadata = state_details['setting_metadata'] hs.upload = state_details['upload'] hs.estimated_progress = sd_bioblend['percent_complete'] else: hs = History( galaxyinstancetracking=git, name=hist['name'], update_time=datetime_object.strftime('%Y-%m-%d %H:%M:%S'), empty=state_details['empty'], error=state_details['error'], failed_metadata=state_details['failed_metadata'], new=state_details['new'], ok=state_details['ok'], paused=state_details['paused'], running=state_details['running'], queued=state_details['queued'], setting_metadata=state_details['setting_metadata'], upload=state_details['upload'], galaxy_id=hist['id'], estimated_progress=sd_bioblend['percent_complete']) hs.save() sd['history_data_bioblend_list'] = '/galaxy/history_data_bioblend_list/{}'.format( hs.pk) status.append(sd) status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True) return status
class GalaxyHandler: ''' This class represents a Galaxy instance and provides functions to interact with that instance. ''' def __init__(self, url, api_key, container_file=None, oci_bundle=False): self.url = url self.api_key = api_key self.container_file = container_file self.oci_bundle = oci_bundle # Bioblend GalaxyInstance self.instance = None # Bioblend Clients self.user_client = None self.config_client = None self.workflow_client = None self.tool_client = None self.toolshed_client = None self.library_client = None self.roles_client = None self.history_client = None self.dataset_client = None def start_container_galaxy(self, writable=False, binds=None): ''' Run a containerized Galaxy instance. ''' with open(os.devnull, 'w') as FNULL: if self.oci_bundle: subprocess.call([ "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) else: if writable: subprocess.call([ "sudo", "singularity", "exec", "-w", self.container_file, "sh", "/galaxy/run.sh", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) elif binds: subprocess.call([ "singularity", "exec", "--bind", binds, self.container_file, "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) else: subprocess.call([ "singularity", "exec", self.container_file, "sh", "/galaxy/run.sh", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) # Wait until the Galaxy instance is available but do not wait longer than 1 minute response = None t = 0 while not response: try: response = urllib.urlopen( self.url).getcode() # returns 200 if galaxy is up except: if t > 60: logger.error( "Galaxy is not up after 1 minute. Something went wrong. Maybe the container is corrupted. Try to open a shell in writable mode in the container and start Galaxy from the shell" ) exit(1) else: # Wait 5s until Galaxy is up logger.info( "Galaxy is not up ... wait 5 seconds and try again" ) t = t + 5 time.sleep(5) response = None continue self.instance_running = True return def stop_container_galaxy(self, sudo=False, bind_dirs=None, tmp_dir=None): ''' Stop a running containerized Galaxy instance. Remove an existing temporary directory ''' with open(os.devnull, 'w') as FNULL: if self.oci_bundle: # No binds, no Singularity, just plain run.sh stop-daemon subprocess.call(["sh", "/galaxy/run.sh", "--stop-daemon"], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) else: if sudo: # We use sudo only for importing workflows, so no binds. subprocess.call([ "sudo", "singularity", "exec", "-w", self.container_file, "sh", "/galaxy/run.sh", "--stop-daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) else: # We this only for workflow execution subprocess.call([ "singularity", "exec", "--bind", bind_dirs, self.container_file, "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--stop-daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) # Remove temporary directories if tmp_dir: logger.info("Remove temporary directory: %s", tmp_dir) shutil.rmtree(tmp_dir) return def create_galaxy_instance(self, check_admin=False): ''' Create a bioblend GalaxyInstance. If check_admin = True, check if the user is admin of the galaxy instance. If not, return None. Returns False if an error occurs. ''' # Check if the URL is valid if not check_url(self.url): logger.error("URL to galaxy instance is not a valid URL: %s", self.url) return False # Try to create a bioblend Galaxy instance try: self.instance = GalaxyInstance(url=self.url, key=self.api_key) except: logger.error("Cannot create Galaxy instance.") return False return True def create_clients(self): ''' Create bioblend clients for the Galaxy instance. ''' # Create first client and check if the API works self.config_client = ConfigClient(self.instance) try: self.config_client.get_version() self.config_client.get_config() except: logger.error("Provided API-key does not work.") return False try: self.user_client = UserClient(self.instance) self.workflow_client = WorkflowClient(self.instance) self.tool_client = ToolClient(self.instance) self.toolshed_client = ToolShedClient(self.instance) self.library_client = LibraryClient(self.instance) self.roles_client = RolesClient(self.instance) self.history_client = HistoryClient(self.instance) self.dataset_client = DatasetClient(self.instance) except: logger.error("Error initializing other bioblend clients.") return False return True def initialize(self): ''' Initialize bioblend GalaxyInstance, clients, and check if the API works. Returns False if something went wrong. ''' if not self.create_galaxy_instance(): logger.error( "Cannot create bioblend GalaxyInstance for the GalaxyHandler") return False if not self.create_clients(): logger.error( "Cannot create bioblend clients for the GalaxyHandler") return False return True def create_user(self, name, mail, password): ''' Create a new Galaxy user for an specific Galaxy instance. Return the user_id and an api-key. ''' try: new_user = self.user_client.create_local_user(name, mail, password) except ConnectionError as e: # User already exists if "already exists" in e.body: new_user = self.user_client.get_users(f_email=mail)[0] new_user_id = new_user['id'] # Create API key for that user new_user_api_key = self.user_client.create_user_apikey(new_user_id) return (new_user_id, new_user_api_key) def create_input_library(self, name, user): ''' Create a dataset library for this instance. ''' try: # Create the library new_library = self.library_client.create_library(name, description=None, synopsis=None) logger.info("new_library ok") # Get the role of the user user_role_id = self.roles_client.get_roles()[0]['id'] logger.info("user_role_id ok") # Set permissions for that library # The following settings will enable the upload of input data by the user to this libary self.library_client.set_library_permissions( library_id=new_library['id'], access_in=user_role_id, modify_in=user_role_id, add_in=user_role_id, manage_in=user_role_id) return True except: logger.error("Cannot create Galaxy data library") return False def create_history(self, name): ''' Create a history and return the history id ''' history_dict = self.history_client.create_history(name) return history_dict['id'] def create_folder(self, library_name, user_mail): ''' Create a folder for the files in a library. This is used to store files for the a Galaxy library. Return a tuple containing the library id and the folder id. ''' # Assume that there is just one library with this name library = self.library_client.get_libraries(library_id=None, name=library_name, deleted=False)[0] folder = self.library_client.create_folder(library['id'], user_mail) return library['id'], folder[0]['id'] def upload_workflow_input(self, workflow_input, library_id, folder_id, mount_input_dir=True, input_dir=None): ''' Upload the input data for a workflow to Galaxy. The files are uploaded from the filesystem to a folder of an Galaxy library. The files are not duplicated, because just symbolic links will be created. If a user provides his own data, the files are 'uploaded' from the /input directory, which is just a mount point for a directory outside the container. If a user wants to use test data provided with the container, mount_input_dir is False and the directory inside the container has to be specified. ''' for step_uuid, step_param in workflow_input.iteritems(): if step_param['step_type'] == 'data_input': if mount_input_dir: # Input data is mounted in the container path = os.path.join('/input', step_param['filename']) else: # input_dir exists inside the container (e.g. workflow test data) path = os.path.join(input_dir, step_param['filename']) logger.info("Next upload: " + path) workflow_input[step_uuid][ 'dataset_id'] = self.library_client.upload_from_galaxy_filesystem( library_id, path, folder_id=folder_id, file_type=step_param['galaxy_file_type'], link_data_only='link_to_files') def export_output_history(self, history_id, output_dir): ''' Export all datasets of a history to the output directory. ''' # Get a list of all datasets in the output history history_datasets = self.history_client.show_history(history_id, contents=True, deleted=None, visible=None, details=None, types=None) # Iterate over the datasets of the history and download each dataset that has 'ok' state (e.g. the tool completed) for dataset in history_datasets: # Check the dataset status, e.g. if the corresponding task completed. Do not download input datasets! if dataset['state'] == 'ok': logger.info("Download dataset %s, state: %s", dataset['name'], dataset['state']) self.dataset_client.download_dataset(dataset['id'], file_path=output_dir, use_default_filename=True, wait_for_completion=False, maxwait=12000) else: logger.info("Do not download dataset %s, state: %s", dataset['name'], dataset['state'])
#!/usr/bin/env python import os import shutil import galaxy_ie_helpers from bioblend.galaxy.histories import HistoryClient hid = os.environ.get('DATASET_HID', None) history_id = os.environ['HISTORY_ID'] if hid not in ('None', None): galaxy_ie_helpers.get(int(hid)) shutil.copy('/import/%s' % hid, '/import/ipython_galaxy_notebook.ipynb') additional_ids = os.environ.get("ADDITIONAL_IDS", "") if additional_ids: gi = galaxy_ie_helpers.get_galaxy_connection(history_id=history_id, obj=False) hc = HistoryClient(gi) history = hc.show_history(history_id, contents=True) additional_ids = additional_ids.split(",") for hda in history: if hda["id"] in additional_ids: galaxy_ie_helpers.get(int(hda["hid"]))
def get(datasets_identifiers, identifier_type='hid', history_id=None, retrieve_datatype=None): """ Given the history_id that is displayed to the user, this function will either search for matching files in the history if the identifier_type is set to 'regex', otherwise it will directly download the file[s] from the history and stores them under /import/. Return value[s] are the path[s] to the dataset[s] stored under /import/ """ history_id = history_id or os.environ['HISTORY_ID'] # The object version of bioblend is to slow in retrieving all datasets from a history # fallback to the non-object path gi = get_galaxy_connection(history_id=history_id, obj=False) file_path_all = [] datatypes_all = [] if type(datasets_identifiers) is not list: datasets_identifiers = [datasets_identifiers] if identifier_type == "regex": datasets_identifiers = find_matching_history_ids(datasets_identifiers) identifier_type = "hid" for dataset_id in datasets_identifiers: file_path = '/import/%s' % dataset_id log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id, dataset_id) # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datasets = {ds[identifier_type]: ds['id'] for ds in history} if retrieve_datatype: datatypes_all.append( {ds[identifier_type]: ds['extension'] for ds in history}) if identifier_type == 'hid': dataset_id = int(dataset_id) dc.download_dataset(datasets[dataset_id], file_path=file_path, use_default_filename=False) else: hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datatypes_all.append( {ds[identifier_type]: ds['extension'] for ds in history}) log.debug('Cached, not re-downloading') file_path_all.append(file_path) ## First path if only one item given, otherwise all paths. ## Should not break compatibility. if retrieve_datatype: if len(file_path_all) == 1: dataset_number = int(file_path_all[0].strip().split("/")[-1]) return file_path_all, datatypes_all[0][dataset_number] else: datatype_multi = dict() for i in file_path_all: dataset_number = int(i.strip().split("/")[-1]) datatype_multi[dataset_number] = datatypes_all[0][ dataset_number] return file_path_all, datatype_multi else: return file_path_all[0] if len(file_path_all) == 1 else file_path_all