def __main__(): parser = argparse.ArgumentParser(description = 'Import workflows from a local directory') parser.add_argument('-p', '--port', help = 'port number from your docker container that you map to your host machine. The default is 80', default = '80') parser.add_argument('-k', '--key', help = 'user API key') args = parser.parse_args() # port and api key port = args.port api_key = args.key # galaxy client instance galaxy_home = environ['GALAXY_HOME'] galaxy_client = GalaxyInstance(url = "http://127.0.0.1:" + port, key = api_key) if galaxy_client: # workflow client instance workflow_client = WorkflowClient(galaxy_client) my_workflows_dir = environ['GALAXY_HOME'] + '/my_workflows' workflow_files = [] for f in listdir(my_workflows_dir): if isfile(join(my_workflows_dir, f)): f_path = join(my_workflows_dir, f) workflow_client.import_workflow_from_local_path(f_path) print('Imported workflow: ' + f)
def main(): try: input_path = sys.argv[1] output_path = sys.argv[2] galaxyInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY) historyClient = HistoryClient(galaxyInstance) toolClient = ToolClient(galaxyInstance) workflowClient = WorkflowClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) history = historyClient.create_history('tmp') uploadedFile = toolClient.upload_file(input_path, history['id'] ) workflow = workflowClient.show_workflow(WORKFLOW_ID) dataset_map = {workflow['inputs'].keys()[0]: {'id': uploadedFile['outputs'][0]['id'], 'src': 'hda'}} params = {TOOL_ID_IN_GALAXY: {'param': 'reference_genome', 'value': 'hg19'}} output = workflowClient.run_workflow(WORKFLOW_ID, dataset_map, params, history['id']) downloadDataset(datasetClient, findDatasedIdByExtention(datasetClient, output, 'bed'), output_path) #delete history historyClient.delete_history(history['id']) #if galaxy instance support dataset purging #historyClient.delete_history(history['id'], True) except IndexError: print 'usage: %s key url workflow_id history step=src=dataset_id' % os.path.basename(sys.argv[0]) sys.exit(1)
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) itemp = 0 for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('_')[-1].split('.')[0] #print replicate if replicate == '1': itemp = itemp + 1 if not (itemp >= 71 and itemp <= 92): continue base = f['name'].split('_')[:-1] #print base forward_name = f['name'] reverse_name = '_'.join(base) + '_2.fastq.bz2' forward_id = f['id'] files2 = libraryClient.show_library(brassica_library[0]['id'], contents=True) for f2 in files2: if f2['name'] == reverse_name: reverse_id = f2['id'] print forward_name print reverse_name new_history_name = f['name'].split('_')[7] + "_" + f['name'].split( '_')[-3] + "_" + f['name'].split('_')[-2] print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) dataset_R = histories.upload_dataset_from_library( hist['id'], reverse_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} datamap['1'] = {'src': 'hda', 'id': dataset_R['id']} workflows = workflowsClient.get_workflows(name="Maize HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def check_galaxy(api_key, galaxy_url): gi = GalaxyInstance(galaxy_url, key=api_key) gi.verify = False wc = WorkflowClient(gi) try: wc.get_workflows() except ConnectionError as e: raise forms.ValidationError( 'Something is wrong with Galaxy connection, please check')
def main(): try: gInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY) wClient = WorkflowClient(gInstance) print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> get current workflowlist...' gInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY) wClient = WorkflowClient(gInstance) dataset = wClient.get_workflows() wf_namelist = [x['name'] for x in dataset if x['deleted'] == False] print wf_namelist print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> clone BiT Workflows from github...' if not os.path.exists(wf_dname + '/' + repo_name): makeDir(wf_dname) os.chdir(wf_dname) git_url = 'https://github.com/myoshimura080822/' + repo_name + '.git' Repo.clone_from(git_url, repo_name) else: print repo_name + ' already cloned. To update, Please delete, move or rename dir before this script execute.' return 0 print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> delete and inport workflow files...' mytoolsdir = wf_dname + '/' + repo_name + '/' clone_wf_list = [file.replace(mytoolsdir, "") for file in get_all_ga(mytoolsdir)] print clone_wf_list delete_itm =[] [[ delete_itm.append(y) for y in wf_namelist if y.find(x.replace('.ga','')) > -1] for x in clone_wf_list] print delete_itm id_list = [] [[id_list.append(x['id']) for x in dataset if x['name'].find(y) > -1] for y in delete_itm] print id_list [wClient.delete_workflow(x) for x in id_list] print wClient.get_workflows() wf_file_path = get_all_ga(mytoolsdir) [wClient.import_workflow_from_local_path(file) for file in wf_file_path] print wClient.get_workflows() print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> script ended :)' return 0 except: info = sys.exc_info() tbinfo = traceback.format_tb( info[2] ) print 'Error Info...'.ljust( 80, '=' ) for tbi in tbinfo: print tbi print ' %s' % str( info[1] ) print '\n'.rjust( 85, '=' ) sys.exit(1)
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('/')[-1][0] #print replicate if replicate == 'X': base = f['name'].split('/')[-1].split('.')[0] #print base forward_name = f['name'] forward_id = f['id'] print forward_name new_history_name = base print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} workflows = workflowsClient.get_workflows( name="Maize Small samples HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def create_clients(self): ''' Create bioblend clients for the Galaxy instance. ''' # Create first client and check if the API works self.config_client = ConfigClient(self.instance) try: self.config_client.get_version() self.config_client.get_config() except: logger.error("Provided API-key does not work.") return False try: self.user_client = UserClient(self.instance) self.workflow_client = WorkflowClient(self.instance) self.tool_client = ToolClient(self.instance) self.toolshed_client = ToolShedClient(self.instance) self.library_client = LibraryClient(self.instance) self.roles_client = RolesClient(self.instance) self.history_client = HistoryClient(self.instance) self.dataset_client = DatasetClient(self.instance) except: logger.error("Error initializing other bioblend clients.") return False return True
def workflow_sync(user): # get all instances for that user gits = GalaxyInstanceTracking.objects.filter( galaxyuser__internal_user=user) dj_wfs = Workflow.objects.all() # loop through instances all_wfs = [] for git in gits: ## loop through workflows for that instance gi, gu = get_gi_gu(user, git) wc = WorkflowClient(gi) wfs = wc.get_workflows() all_wfs.extend(wfs) for wf in wfs: wfd = wc.show_workflow(wf['id']) ### check if id of the workflow already in galaxy wjson = wc.export_workflow_dict(wf['id']) dj_wf = dj_wfs.filter(galaxy_id=wfd['id']) if dj_wf: if not dj_wf[0].latest_workflow_uuid == wf[ 'latest_workflow_uuid']: dj_wf_update = dj_wf[0] dj_wf_update.latest_workflow_uuid = wf[ 'latest_workflow_uuid'] dj_wf_update.name = wf['name'] dj_wf_update.workflowjson = wjson dj_wf_update.save() else: workflow = Workflow(galaxy_id=wf['id'], name=wf['name'], description='added automatically', galaxyinstancetracking=git, added_by=user, workflowjson=wjson) workflow.save() all_wfs_gi = [w['id'] for w in all_wfs] ## check if workflow is currently accessible Workflow.objects.exclude(galaxy_id__in=all_wfs_gi).update(accessible=False)
def run_galaxy_workflow(wid, user, git, pkd, l, history_name, library): gi, gu = get_gi_gu(user, git) st = get_time_stamp() workflow_input_d = get_workflow_inputs(l, pkd, gi, git, history_name, library) wc = WorkflowClient(gi) print(workflow_input_d) workflow = Workflow.objects.get(id=wid) wf = wc.get_workflows(workflow_id=workflow.galaxy_id)[0] wfi = wc.invoke_workflow(wf['id'], inputs=workflow_input_d, import_inputs_to_history=True, history_name='{}_({})'.format(history_name, st)) return wfi
def get_galaxy_workflow_inputs(w, user): wf = w.workflowfile git = w.galaxyinstancetracking wf_name = w.name api_key = GalaxyUser.objects.get(internal_user=user, galaxyinstancetracking=git).api_key galaxy_url = git.url gi = GalaxyInstance(galaxy_url, key=api_key) gi.verify = False wc = WorkflowClient(gi) wfd = wf.read() jsonload = json.loads(wfd) now = datetime.datetime.now() jsonload['name'] = '{} dj-upload[{} {}]'.format(jsonload['name'], wf_name, now.strftime("%Y-%m-%d")) wfimp = wc.import_workflow_json(jsonload) return check_workflow_data_inputs(wfimp['id'], wc), wfimp['id']
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) historyClient = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) histories = historyClient.get_histories(deleted=False) for hist in histories: hist_id = hist['id'] countSecondary = historyClient.show_matching_datasets( hist_id, name_filter=name_filter) if len(countSecondary) != 0: #print(countSecondary) file_path = dir_name + '/' + hist[ 'name'] + '_' + name_filter + '.' + ext #print(file_path) #print(countSecondary[0]['dataset_id']) datasetClient.download_dataset(countSecondary[0]['id'], file_path=file_path, use_default_filename=False) sys.exit()
def get_workflow_status(user): # go through every galaxy instance gits = GalaxyInstanceTracking.objects.filter( galaxyuser__internal_user=user) dj_wfs = Workflow.objects.all() # loop through instances status = [] for git in gits: ## loop through workflows for that instance gi, gu = get_gi_gu(user, git) wc = WorkflowClient(gi) hc = HistoryClient(gi) wfs = wc.get_workflows() for wf in wfs: wfd = wc.show_workflow(wf['id']) winvoke = wc.get_invocations(wf['id']) for wi in winvoke: wid = wc.show_invocation(wf['id'], wi['id']) h_l = hc.get_histories(wid['history_id'], deleted=True) if h_l: h = h_l[0] else: continue sd = get_status_d(wid) sd['name'] = wfd['name'] hd = hc.show_history(h['id']) sd['history_name'] = h['name'] datetime_object = datetime.strptime(hd['update_time'], '%Y-%m-%dT%H:%M:%S.%f') # sd['history_url'] = '{}{}'.format(git.url, hd['url']) sd['update_time'] = datetime_object.strftime( '%Y-%m-%d %H:%M:%S') sd['update_time_unix'] = unixtime(datetime_object) sd['galaxy_instance'] = git.name status.append(sd) status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True) return status
def main(): try: gInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) wClient = WorkflowClient(gInstance) print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> get current workflowlist...' gInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) wClient = WorkflowClient(gInstance) dataset = wClient.get_workflows() wf_namelist = [x['name'] for x in dataset if x['deleted'] == False] print wf_namelist print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> clone BiT Workflows from github...' if not os.path.exists(wf_dname + '/galaxy-workflow_rnaseq'): makeDir(wf_dname) os.chdir(wf_dname) git_url = 'https://github.com/myoshimura080822/galaxy-workflow_rnaseq.git' Repo.clone_from(git_url, 'galaxy-workflow_rnaseq') else: print 'BiT Workflow already cloned. To update, Please delete, move or rename "/galaxy-workflow_rnaseq" before script execute.' return 0 print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> delete and inport workflow files...' mytoolsdir = wf_dname + '/galaxy-workflow_rnaseq/' clone_wf_list = [ file.replace(mytoolsdir, "") for file in get_all_ga(mytoolsdir) ] print clone_wf_list delete_itm = [] [[ delete_itm.append(y) for y in wf_namelist if y.find(x.replace('.ga', '')) > -1 ] for x in clone_wf_list] print delete_itm id_list = [] [[id_list.append(x['id']) for x in dataset if x['name'].find(y) > -1] for y in delete_itm] print id_list [wClient.delete_workflow(x) for x in id_list] print wClient.get_workflows() [ wClient.import_workflow_from_local_path(file) for file in get_all_ga(mytoolsdir) ] print wClient.get_workflows() print ':::::::::::::::::::::::::::::::::::::::::::' print '>>>>>>>>>>>>>>>>> script ended :)' return 0 except: info = sys.exc_info() tbinfo = traceback.format_tb(info[2]) print 'Error Info...'.ljust(80, '=') for tbi in tbinfo: print tbi print ' %s' % str(info[1]) print '\n'.rjust(85, '=') sys.exit(1)
else: print "You passed %s I need a .ini file" %(sys.argv[1],) sys.exit(1) else: parser.read('configuration.ini') api_key = get_api_key(parser.get('Globals', 'api_file')) galaxy_host = parser.get('Globals', 'galaxy_host') file_name_re = re.compile(parser.get('Globals', 'sample_re')) galaxyInstance = GalaxyInstance(galaxy_host, key=api_key) historyClient = HistoryClient(galaxyInstance) toolClient = ToolClient(galaxyInstance) workflowClient = WorkflowClient(galaxyInstance) dataSetClient = DatasetClient(galaxyInstance) files = get_files(parser.get('Globals','fastq_dir')) if len(files) == 0: print "Not able to find any fastq files looked in %s" %(parser.get('Globals', 'fastq_dir')) else: print "Found fastq files running workflow for the following files (R2's will be added)" print ",".join(files) files_to_keep = {} for R1 in files: input_dir_path = os.path.dirname(R1)+"/" R2 = R1.replace('R1','R2') if not os.path.exists(R1): print "%s File Not Found" % (R1, ) raise Exception
parser = SafeConfigParser() if len(sys.argv) >= 2: if sys.argv[1].endswith('.ini'): parser.read(sys.argv[1]) else: print "You passed %s I need a .ini file" %(sys.argv[1],) sys.exit(1) else: parser.read('configuration.ini') api_key = get_api_key(parser.get('Globals', 'api_file')) galaxy_host = parser.get('Globals', 'galaxy_host') galaxyInstance = GalaxyInstance(galaxy_host, key=api_key) libraryInstance = LibraryClient(galaxyInstance) libs = libraryInstance.get_libraries(name=parser.get('Globals','default_lib')) details = libraryInstance.get_folders(library_id=libs[0]['id']) folder = libraryInstance.show_library(library_id=libs[0]['id'],contents=True) for f in folder[1:]: print "%s:%s" % (f['name'],f['id']) workflow = WorkflowClient(galaxyInstance) wf = workflow.get_workflows() for w in wf: print w['id'] print w['name'] print w['owner']
def runWorkflow(argDictionary, comparisons,samples): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import tempfile import time api_key = '' galaxy_host = '' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(argDictionary['accessionNumber']) comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt') sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular') if argDictionary['site'] == "ENA": #fastqs available on ENA tool_inputs = { "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} } #run the tool to get the data from ENA tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #sleep until all the fastq files are findable time.sleep(120) dirpath = tempfile.mkdtemp() fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0] fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath) num_lines = sum(1 for line in open(fileList)) -1 datasets=list() while len(datasets)!=num_lines: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) else: #for SRA if argDictionary['single'] == "TRUE": with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: print (sample) fileNames=str.split(sample["File"],"|") for fileName in fileNames: tool_inputs = { "input|input_select":"accession_number", "outputformat":"fastqsanger.gz", "input|accession":fileName } #run the tool to get the single data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs) else: with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: tool_inputs = { "accession_number":sample["File"] } #run the tool to get the paired data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) #get the fastQC tool for fastq in datasets: try: tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}} tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs) except Exception: pass #wait till complete while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #make dataset collections for quantification using the fastq files collections=list() with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: datasets=list() fileNames=str.split(row["File"],"|") for fileName in fileNames: datasets= datasets + getDatasetsByApproxName(fileName,history,history_client ) #make list of datasets collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client)) #get the correct kallisto index species = argDictionary['species'].lower() index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile") index = {'id': index, 'src': 'hda'} #run kallisto for every dataset collection for collection in collections: #set up the tool_inputs tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]} #often encounter connection broken error - possible problem with Certus server? #bypass by ignoring the exception tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs) # we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) # Run multiqc on kallisto logs and fastqc files datasets = getDatasetsByApproxName("RawData",history,history_client ) kallistoLogs = getDatasetsByApproxName(".log", history, history_client) tool_inputs = {} for i, dataset in enumerate(datasets+kallistoLogs): if not dataset["deleted"]: if dataset in datasets: software = 'fastqc' else: software = 'kallisto' params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']} tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params}) # #summarise with the multiQC tool tool_client.run_tool(history['id'],'multiqc', tool_inputs) multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0] #get all the abundance files to convert to gene level counts matrix datasets = getDatasetsByApproxName(".abundance",history,history_client ) #make a dataset collection for to make a countsMatrix collection = makeDataSetCollection(datasets,"abundances",history,history_client) #set up the tool_inputs tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']} #convert abundances to gene level counts matrix tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) txi = getDatasetsByApproxName("txi",history,history_client) #set up the tool_inputs for PCA tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs) pca = getDatasetsByApproxName("PCA",history,history_client)[0] #set up the tool_inputs for DESeq2 tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} , 'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs) #run chrdir tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #get the foldchange data, cut and run pathway workflow dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) #tool_client.upload_file(outFileName, history['id'], file_type='tsv') return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
def runWorkflow(argDictionary, comparisons): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import time api_key = '' galaxy_host = 'http://localhost:8080/' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(row['accessionNumber']) # Import the galaxy workflow workflow = workflow_client.show_workflow('a799d38679e985db') input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt') # Run workflow on csv data to create a new history. params = dict() for key in workflow['steps'].keys(): params[key] = argDictionary datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}} workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
__version__ = '0.1.0' #import logging #logging.basicConfig(level=logging.DEBUG) upload_history_name = 'Uploaded data' upload_history_tag = 'user_data' workflow_tag = 'islandcompare' workflow_owner = 'brinkmanlab' application_tag = 'IslandCompare' ext_to_datatype = { "genbank": "genbank", "gbk": "genbank", "embl": "embl", "gbff": "genbank", "newick": "newick", "nwk": "newick" } WorkflowClient.set_max_get_retries(5) HistoryClient.set_max_get_retries(5) DatasetClient.set_max_get_retries(5) JobsClient.set_max_get_retries(5) InvocationClient.set_max_get_retries(5) # ======== Patched bioblend functions =========== # TODO Remove after upgrading to v0.16.0 def get_invocations(self, workflow_id, history_id=None, user_id=None, include_terminal=True, limit=None, view='collection', step_details=False): url = self._invocations_url(workflow_id) params = {'include_terminal': include_terminal, 'view': view, 'step_details': step_details} if history_id: params['history_id'] = history_id if user_id: params['user_id'] = user_id if limit: params['limit'] = limit return self._get(url=url, params=params)