def create_clients(self): ''' Create bioblend clients for the Galaxy instance. ''' # Create first client and check if the API works self.config_client = ConfigClient(self.instance) try: self.config_client.get_version() self.config_client.get_config() except: logger.error("Provided API-key does not work.") return False try: self.user_client = UserClient(self.instance) self.workflow_client = WorkflowClient(self.instance) self.tool_client = ToolClient(self.instance) self.toolshed_client = ToolShedClient(self.instance) self.library_client = LibraryClient(self.instance) self.roles_client = RolesClient(self.instance) self.history_client = HistoryClient(self.instance) self.dataset_client = DatasetClient(self.instance) except: logger.error("Error initializing other bioblend clients.") return False return True
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) itemp = 0 for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('_')[-1].split('.')[0] #print replicate if replicate == '1': itemp = itemp + 1 if not (itemp >= 71 and itemp <= 92): continue base = f['name'].split('_')[:-1] #print base forward_name = f['name'] reverse_name = '_'.join(base) + '_2.fastq.bz2' forward_id = f['id'] files2 = libraryClient.show_library(brassica_library[0]['id'], contents=True) for f2 in files2: if f2['name'] == reverse_name: reverse_id = f2['id'] print forward_name print reverse_name new_history_name = f['name'].split('_')[7] + "_" + f['name'].split( '_')[-3] + "_" + f['name'].split('_')[-2] print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) dataset_R = histories.upload_dataset_from_library( hist['id'], reverse_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} datamap['1'] = {'src': 'hda', 'id': dataset_R['id']} workflows = workflowsClient.get_workflows(name="Maize HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('/')[-1][0] #print replicate if replicate == 'X': base = f['name'].split('/')[-1].split('.')[0] #print base forward_name = f['name'] forward_id = f['id'] print forward_name new_history_name = base print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} workflows = workflowsClient.get_workflows( name="Maize Small samples HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def sync_galaxy_files(user): print('check') # go through all the galaxylink files associated with the galaxy_instance_id gits = GalaxyInstanceTracking.objects.filter(galaxyuser__internal_user=user) # loop through galaxy instance for git in gits: print(git, 'GIT..................................') gflks = GalaxyFileLink.objects.filter(galaxyinstancetracking=git) gi, gu = get_gi_gu(user, git) # loop through galaxy files for gflk in gflks: dc = DatasetClient(gi) lc = LibraryClient(gi) if gflk.galaxy_library: mtch = dc.show_dataset(gflk.galaxy_id, hda_ldda='lda') print('MATCH', mtch) if isinstance(mtch, dict): if mtch['deleted']: gflk.removed = True else: gflk.removed = True else: mtch = dc.show_dataset(gflk.galaxy_id, hda_ldda='hda') if isinstance(mtch, dict) and (mtch['deleted'] or mtch['purged']): gflk.removed=True gflk.save()
def galaxy_isa_upload_datalib(pks, galaxy_isa_upload_param, galaxy_pass, user_id, celery_obj=''): # ... Should this just be for admin? or shall all user have ability ? .... not sure # update celery if celery_obj: celery_obj.update_state(state='RUNNING', meta={ 'current': 0.1, 'total': 100, 'status': 'Initialising galaxy' }) # get the galaxy clients required for updating the galaxy instance git = galaxy_isa_upload_param.galaxyinstancetracking gi, gu = get_gi_gu(galaxy_isa_upload_param.added_by, git) lc = LibraryClient(gi) # Retrieve or create the base library used for all ISA folders lib = create_library(lc, 'mogi') # get all associated files for the selected ISA projects mfiles = get_mfile_qs(pks) # # # Add the files to Galaxy data library try: create_isa_datalib(mfiles, lib, gi, gu, galaxy_pass, galaxy_isa_upload_param, user_id, celery_obj) except error_perm as e: print('ERROR CATCH', e) if celery_obj: celery_obj.update_state(state='FAILURE', meta={ 'current': 0.0, 'total': 100, 'status': 'Failed {}'.format(e) }) return 0 except bioblend.ConnectionError as e: print('ERROR CATCH', e) if celery_obj: celery_obj.update_state(state='FAILURE', meta={ 'current': 0.0, 'total': 100, 'status': 'Failed {}'.format(e) }) return 0 return 1
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) historyClient = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) histories = historyClient.get_histories(deleted=False) for hist in histories: hist_id = hist['id'] countSecondary = historyClient.show_matching_datasets( hist_id, name_filter=name_filter) if len(countSecondary) != 0: #print(countSecondary) file_path = dir_name + '/' + hist[ 'name'] + '_' + name_filter + '.' + ext #print(file_path) #print(countSecondary[0]['dataset_id']) datasetClient.download_dataset(countSecondary[0]['id'], file_path=file_path, use_default_filename=False) sys.exit()
def f2dl_action(gfile_ids, f2dl_param, galaxy_pass): # get selected files selected_files = GenericFile.objects.filter(pk__in=gfile_ids) galaxy_folder = f2dl_param.folder_name # get the Galaxy Bioblend clients git = f2dl_param.galaxyinstancetracking gi, gu = get_gi_gu(f2dl_param.added_by, git) lc = LibraryClient(gi) # Create base library (just output the lib bioblend object if already created) lib = create_library(lc, f2dl_param.added_by.username) # get full paths from database filelist = files2paths(selected_files) if not filelist: print('filelist empty') return [] # Create the folders in Galaxy data library (can be nested if user used forward slashses) folders = galaxy_folder.split('/') folder_id = create_folders(lc, lib['id'], base_f_id=None, folders=folders) lib_id = lib['id'] # upload the files to the folder uploaded_files = add_filelist_datalib(filelist, f2dl_param, lc, gu, gi, galaxy_pass, lib_id, folder_id, galaxy_folder) # link files to django database link_files_in_galaxy(uploaded_files, selected_files, git, library=True) # check purged files are reference in the database correctly sync_galaxy_files(f2dl_param.added_by)
def runWorkflow(argDictionary, comparisons): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import time api_key = '' galaxy_host = 'http://localhost:8080/' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(row['accessionNumber']) # Import the galaxy workflow workflow = workflow_client.show_workflow('a799d38679e985db') input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt') # Run workflow on csv data to create a new history. params = dict() for key in workflow['steps'].keys(): params[key] = argDictionary datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}} workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
class GalaxyHandler: ''' This class represents a Galaxy instance and provides functions to interact with that instance. ''' def __init__(self, url, api_key, container_file=None, oci_bundle=False): self.url = url self.api_key = api_key self.container_file = container_file self.oci_bundle = oci_bundle # Bioblend GalaxyInstance self.instance = None # Bioblend Clients self.user_client = None self.config_client = None self.workflow_client = None self.tool_client = None self.toolshed_client = None self.library_client = None self.roles_client = None self.history_client = None self.dataset_client = None def start_container_galaxy(self, writable=False, binds=None): ''' Run a containerized Galaxy instance. ''' with open(os.devnull, 'w') as FNULL: if self.oci_bundle: subprocess.call([ "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) else: if writable: subprocess.call([ "sudo", "singularity", "exec", "-w", self.container_file, "sh", "/galaxy/run.sh", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) elif binds: subprocess.call([ "singularity", "exec", "--bind", binds, self.container_file, "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) else: subprocess.call([ "singularity", "exec", self.container_file, "sh", "/galaxy/run.sh", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) # Wait until the Galaxy instance is available but do not wait longer than 1 minute response = None t = 0 while not response: try: response = urllib.urlopen( self.url).getcode() # returns 200 if galaxy is up except: if t > 60: logger.error( "Galaxy is not up after 1 minute. Something went wrong. Maybe the container is corrupted. Try to open a shell in writable mode in the container and start Galaxy from the shell" ) exit(1) else: # Wait 5s until Galaxy is up logger.info( "Galaxy is not up ... wait 5 seconds and try again" ) t = t + 5 time.sleep(5) response = None continue self.instance_running = True return def stop_container_galaxy(self, sudo=False, bind_dirs=None, tmp_dir=None): ''' Stop a running containerized Galaxy instance. Remove an existing temporary directory ''' with open(os.devnull, 'w') as FNULL: if self.oci_bundle: # No binds, no Singularity, just plain run.sh stop-daemon subprocess.call(["sh", "/galaxy/run.sh", "--stop-daemon"], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) else: if sudo: # We use sudo only for importing workflows, so no binds. subprocess.call([ "sudo", "singularity", "exec", "-w", self.container_file, "sh", "/galaxy/run.sh", "--stop-daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) else: # We this only for workflow execution subprocess.call([ "singularity", "exec", "--bind", bind_dirs, self.container_file, "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--stop-daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) # Remove temporary directories if tmp_dir: logger.info("Remove temporary directory: %s", tmp_dir) shutil.rmtree(tmp_dir) return def create_galaxy_instance(self, check_admin=False): ''' Create a bioblend GalaxyInstance. If check_admin = True, check if the user is admin of the galaxy instance. If not, return None. Returns False if an error occurs. ''' # Check if the URL is valid if not check_url(self.url): logger.error("URL to galaxy instance is not a valid URL: %s", self.url) return False # Try to create a bioblend Galaxy instance try: self.instance = GalaxyInstance(url=self.url, key=self.api_key) except: logger.error("Cannot create Galaxy instance.") return False return True def create_clients(self): ''' Create bioblend clients for the Galaxy instance. ''' # Create first client and check if the API works self.config_client = ConfigClient(self.instance) try: self.config_client.get_version() self.config_client.get_config() except: logger.error("Provided API-key does not work.") return False try: self.user_client = UserClient(self.instance) self.workflow_client = WorkflowClient(self.instance) self.tool_client = ToolClient(self.instance) self.toolshed_client = ToolShedClient(self.instance) self.library_client = LibraryClient(self.instance) self.roles_client = RolesClient(self.instance) self.history_client = HistoryClient(self.instance) self.dataset_client = DatasetClient(self.instance) except: logger.error("Error initializing other bioblend clients.") return False return True def initialize(self): ''' Initialize bioblend GalaxyInstance, clients, and check if the API works. Returns False if something went wrong. ''' if not self.create_galaxy_instance(): logger.error( "Cannot create bioblend GalaxyInstance for the GalaxyHandler") return False if not self.create_clients(): logger.error( "Cannot create bioblend clients for the GalaxyHandler") return False return True def create_user(self, name, mail, password): ''' Create a new Galaxy user for an specific Galaxy instance. Return the user_id and an api-key. ''' try: new_user = self.user_client.create_local_user(name, mail, password) except ConnectionError as e: # User already exists if "already exists" in e.body: new_user = self.user_client.get_users(f_email=mail)[0] new_user_id = new_user['id'] # Create API key for that user new_user_api_key = self.user_client.create_user_apikey(new_user_id) return (new_user_id, new_user_api_key) def create_input_library(self, name, user): ''' Create a dataset library for this instance. ''' try: # Create the library new_library = self.library_client.create_library(name, description=None, synopsis=None) logger.info("new_library ok") # Get the role of the user user_role_id = self.roles_client.get_roles()[0]['id'] logger.info("user_role_id ok") # Set permissions for that library # The following settings will enable the upload of input data by the user to this libary self.library_client.set_library_permissions( library_id=new_library['id'], access_in=user_role_id, modify_in=user_role_id, add_in=user_role_id, manage_in=user_role_id) return True except: logger.error("Cannot create Galaxy data library") return False def create_history(self, name): ''' Create a history and return the history id ''' history_dict = self.history_client.create_history(name) return history_dict['id'] def create_folder(self, library_name, user_mail): ''' Create a folder for the files in a library. This is used to store files for the a Galaxy library. Return a tuple containing the library id and the folder id. ''' # Assume that there is just one library with this name library = self.library_client.get_libraries(library_id=None, name=library_name, deleted=False)[0] folder = self.library_client.create_folder(library['id'], user_mail) return library['id'], folder[0]['id'] def upload_workflow_input(self, workflow_input, library_id, folder_id, mount_input_dir=True, input_dir=None): ''' Upload the input data for a workflow to Galaxy. The files are uploaded from the filesystem to a folder of an Galaxy library. The files are not duplicated, because just symbolic links will be created. If a user provides his own data, the files are 'uploaded' from the /input directory, which is just a mount point for a directory outside the container. If a user wants to use test data provided with the container, mount_input_dir is False and the directory inside the container has to be specified. ''' for step_uuid, step_param in workflow_input.iteritems(): if step_param['step_type'] == 'data_input': if mount_input_dir: # Input data is mounted in the container path = os.path.join('/input', step_param['filename']) else: # input_dir exists inside the container (e.g. workflow test data) path = os.path.join(input_dir, step_param['filename']) logger.info("Next upload: " + path) workflow_input[step_uuid][ 'dataset_id'] = self.library_client.upload_from_galaxy_filesystem( library_id, path, folder_id=folder_id, file_type=step_param['galaxy_file_type'], link_data_only='link_to_files') def export_output_history(self, history_id, output_dir): ''' Export all datasets of a history to the output directory. ''' # Get a list of all datasets in the output history history_datasets = self.history_client.show_history(history_id, contents=True, deleted=None, visible=None, details=None, types=None) # Iterate over the datasets of the history and download each dataset that has 'ok' state (e.g. the tool completed) for dataset in history_datasets: # Check the dataset status, e.g. if the corresponding task completed. Do not download input datasets! if dataset['state'] == 'ok': logger.info("Download dataset %s, state: %s", dataset['name'], dataset['state']) self.dataset_client.download_dataset(dataset['id'], file_path=output_dir, use_default_filename=True, wait_for_completion=False, maxwait=12000) else: logger.info("Do not download dataset %s, state: %s", dataset['name'], dataset['state'])
def create_isa_datalib(mfiles, lib, gi, gu, galaxy_pass, galaxy_isa_upload_param, user_id, celery_obj=''): name_map = get_namemap() igrps = group_by_keys(mfiles, (name_map['investigation'], )) file_count = 0 lc = LibraryClient(gi) fc = FoldersClient(gi) for igrp in igrps: # get the investigation name of the group, and create folder ifolder, sgrps = create_investigation_folder(igrp, lc, fc, lib, galaxy_isa_upload_param, name_map) # create samplelist (and create relevant misa file) samplelist_misafile_id = create_samplelist(user_id, igrp) # Upload all generic MISA files (including the one above we just created) investigation_n = igrp[0][name_map['investigation']] misa_files = MISAFile.objects.filter( investigation__name=investigation_n) for misafile in misa_files: save_to_galaxy([misafile], galaxy_isa_upload_param, lc, gu, gi, galaxy_pass, lib['id'], ifolder['id'], misafile.original_filename, True) for sgrp in sgrps: # get the study name of the group and create folder sfolder, agrps = create_study_folder(sgrp, lc, lib, name_map, ifolder) for agrp in agrps: study_n = agrp[0][name_map['study']] investigation_n = agrp[0][name_map['investigation']] assay_n = agrp[0][name_map['assay']] full_assay_name = '{}_{}_{}'.format(study_n, investigation_n, assay_n) if celery_obj: if file_count == 0: count = 0.1 else: count = file_count celery_obj.update_state(state='RUNNING', meta={ 'current': count, 'total': len(mfiles) + 1, 'status': 'Assay: {}'.format(assay_n) }) afolder = create_assay_folder(agrp, lc, lib, name_map, sfolder) data_lib_files = save_to_galaxy(agrp, galaxy_isa_upload_param, lc, gu, gi, galaxy_pass, lib['id'], afolder['id'], full_assay_name) # file_count += len(data_lib_files)
remote_dataset=True, folder_id=folder_id, uuid_list=uuid_string, remote_dataset_type_list=remote_dataset_type_string, file_size_list=file_size_string, line_count_list=line_count_string) if __name__ == "__main__": if (len(sys.argv) < 3): sys.stderr.write( 'Needs 2 arguments : <Galaxy_library_name> <csv_file>\n') sys.exit(-1) #Connect to Galaxy gi = GalaxyInstance(url=galaxy_key.galaxy_host, key=galaxy_key.galaxy_key) li = LibraryClient(gi) library_name = sys.argv[1] csv_file = sys.argv[2] try: fptr = open(csv_file, 'rb') except: sys.stderr.write('Could not open CSV file\n') sys.exit(-1) upload_file_info_list = [] #try: #dialect,id_field_name,fieldnames = import_datasets_by_uuid.check_and_return_header(fptr, delimiter=','); #csv_reader = csv.DictReader(fptr, fieldnames=fieldnames, dialect=dialect); #for row in csv_reader: #info = UploadFileInfo();
def main(): parser = OptionParser() parser.add_option("-A", "--auth-file", dest="auth_filename", help="JSON file with Galaxy host and key", metavar="FILE") parser.add_option( "-f", "--uuid-file", dest="uuids_filename", help= "TSV file with list of UUIDs to import. The first row is assumed to be a header", metavar="FILE") parser.add_option( "-H", "--target-history", dest="target_history", help="Target history name in Galaxy to copy datasets into", metavar="HISTORY_NAME") (options, args) = parser.parse_args() if (not options.auth_filename): print_error_and_exit('Authentication file not provided') #if(not options.uuids_filename): #print_error_and_exit('TSV file with UUIDs not provided'); if (not options.target_history): print_error_and_exit( 'Galaxy history name where datasets will be imported not provided') #Read authentication info galaxy_host, galaxy_key = parse_auth_file(options.auth_filename) gi = GalaxyInstance(url=galaxy_host, key=galaxy_key) history_client = HistoryClient(gi) library_client = LibraryClient(gi) folder_client = FoldersClient(gi) #Read UUIDs file if (options.uuids_filename): try: uuids_fd = open(options.uuids_filename, 'rb') except IOError: print_error_and_exit('Could not open TSV file with UUIDs ' + options.uuids_filename) else: uuids_fd = sys.stdin queried_ds_uuid_dict = parse_TSV_file(uuids_fd) #Search for datasets find_datasets_by_uuids_in_histories(gi, history_client, queried_ds_uuid_dict) find_datasets_by_uuids_in_libraries(gi, library_client, queried_ds_uuid_dict) dataset_info_list = queried_ds_uuid_dict.values() #Validate datasets, discard repeats validate_queried_dataset_info(dataset_info_list) #Get/create target history target_history_id = get_or_create_history_id(gi, history_client, options.target_history) #Copy datasets from library to history copy_from_lib(gi, history_client, dataset_info_list, target_history_id=target_history_id) #Copy from history to /tmp and back - don't use anymore #copy_to_tmp_lib_and_back(gi, library_client, history_client, folder_client, '/tmp', dataset_info_list, target_history_id=target_history_id); #Copy history datasets from other histories copy_other_history_datasets(gi, history_client, dataset_info_list, target_history_id=target_history_id) #Create dataset collections create_dataset_collections(gi, history_client, dataset_info_list, target_history_id=target_history_id)
parser = SafeConfigParser() if len(sys.argv) >= 2: if sys.argv[1].endswith('.ini'): parser.read(sys.argv[1]) else: print "You passed %s I need a .ini file" %(sys.argv[1],) sys.exit(1) else: parser.read('configuration.ini') api_key = get_api_key(parser.get('Globals', 'api_file')) galaxy_host = parser.get('Globals', 'galaxy_host') galaxyInstance = GalaxyInstance(galaxy_host, key=api_key) libraryInstance = LibraryClient(galaxyInstance) libs = libraryInstance.get_libraries(name=parser.get('Globals','default_lib')) details = libraryInstance.get_folders(library_id=libs[0]['id']) folder = libraryInstance.show_library(library_id=libs[0]['id'],contents=True) for f in folder[1:]: print "%s:%s" % (f['name'],f['id']) workflow = WorkflowClient(galaxyInstance) wf = workflow.get_workflows() for w in wf: print w['id'] print w['name'] print w['owner']
def runWorkflow(argDictionary, comparisons,samples): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import tempfile import time api_key = '' galaxy_host = '' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(argDictionary['accessionNumber']) comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt') sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular') if argDictionary['site'] == "ENA": #fastqs available on ENA tool_inputs = { "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} } #run the tool to get the data from ENA tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #sleep until all the fastq files are findable time.sleep(120) dirpath = tempfile.mkdtemp() fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0] fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath) num_lines = sum(1 for line in open(fileList)) -1 datasets=list() while len(datasets)!=num_lines: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) else: #for SRA if argDictionary['single'] == "TRUE": with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: print (sample) fileNames=str.split(sample["File"],"|") for fileName in fileNames: tool_inputs = { "input|input_select":"accession_number", "outputformat":"fastqsanger.gz", "input|accession":fileName } #run the tool to get the single data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs) else: with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: tool_inputs = { "accession_number":sample["File"] } #run the tool to get the paired data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) #get the fastQC tool for fastq in datasets: try: tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}} tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs) except Exception: pass #wait till complete while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #make dataset collections for quantification using the fastq files collections=list() with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: datasets=list() fileNames=str.split(row["File"],"|") for fileName in fileNames: datasets= datasets + getDatasetsByApproxName(fileName,history,history_client ) #make list of datasets collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client)) #get the correct kallisto index species = argDictionary['species'].lower() index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile") index = {'id': index, 'src': 'hda'} #run kallisto for every dataset collection for collection in collections: #set up the tool_inputs tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]} #often encounter connection broken error - possible problem with Certus server? #bypass by ignoring the exception tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs) # we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) # Run multiqc on kallisto logs and fastqc files datasets = getDatasetsByApproxName("RawData",history,history_client ) kallistoLogs = getDatasetsByApproxName(".log", history, history_client) tool_inputs = {} for i, dataset in enumerate(datasets+kallistoLogs): if not dataset["deleted"]: if dataset in datasets: software = 'fastqc' else: software = 'kallisto' params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']} tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params}) # #summarise with the multiQC tool tool_client.run_tool(history['id'],'multiqc', tool_inputs) multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0] #get all the abundance files to convert to gene level counts matrix datasets = getDatasetsByApproxName(".abundance",history,history_client ) #make a dataset collection for to make a countsMatrix collection = makeDataSetCollection(datasets,"abundances",history,history_client) #set up the tool_inputs tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']} #convert abundances to gene level counts matrix tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) txi = getDatasetsByApproxName("txi",history,history_client) #set up the tool_inputs for PCA tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs) pca = getDatasetsByApproxName("PCA",history,history_client)[0] #set up the tool_inputs for DESeq2 tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} , 'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs) #run chrdir tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #get the foldchange data, cut and run pathway workflow dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) #tool_client.upload_file(outFileName, history['id'], file_type='tsv') return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
parser.add_argument("-a", "--apikey") parser.add_argument("-e", "--endpoint") parser.add_argument("-p", "--port") parser.add_argument("-s", "--sourcedir") args = parser.parse_args() host = "127.0.0.1" if not args.endpoint else args.endpoint port = "8080" addr = host + ":{}".format(port) if port else "" apik = args.apikey gi = GalaxyInstance(addr, apik) lc = LibraryClient(gi) fc = FoldersClient(gi) hc = HistoryClient(gi) library_name = "GDC Files" library_description = "A library of files acquired from the NCI Genomic Data Commons (GDC)" libs=lc.get_libraries() lib = {} if libs and isinstance(libs, dict): libs = [libs] if libs: for _lib in libs: if "name" in _lib and _lib["name"] == library_name: lib = _lib else: