def main(): try: input_path = sys.argv[1] output_path = sys.argv[2] galaxyInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY) historyClient = HistoryClient(galaxyInstance) toolClient = ToolClient(galaxyInstance) workflowClient = WorkflowClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) history = historyClient.create_history('tmp') uploadedFile = toolClient.upload_file(input_path, history['id'] ) workflow = workflowClient.show_workflow(WORKFLOW_ID) dataset_map = {workflow['inputs'].keys()[0]: {'id': uploadedFile['outputs'][0]['id'], 'src': 'hda'}} params = {TOOL_ID_IN_GALAXY: {'param': 'reference_genome', 'value': 'hg19'}} output = workflowClient.run_workflow(WORKFLOW_ID, dataset_map, params, history['id']) downloadDataset(datasetClient, findDatasedIdByExtention(datasetClient, output, 'bed'), output_path) #delete history historyClient.delete_history(history['id']) #if galaxy instance support dataset purging #historyClient.delete_history(history['id'], True) except IndexError: print 'usage: %s key url workflow_id history step=src=dataset_id' % os.path.basename(sys.argv[0]) sys.exit(1)
def get( dataset_id, history_id = None ): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ conf = _get_conf() gi = get_galaxy_connection() hc = HistoryClient( gi ) dc = DatasetClient( gi ) file_path = '/import/%s' % dataset_id history_id = history_id or _get_history_id() # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): dataset_mapping = dict( [(dataset['hid'], dataset['id']) for dataset in hc.show_history( history_id, contents=True )] ) try: hc.download_dataset( history_id, dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None ) except: dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False) return file_path
def get(datasets_identifiers, identifier_type='hid', history_id=None): """ Given the history_id that is displayed to the user, this function will download the file[s] from the history and stores them under /import/ Return value[s] are the path[s] to the dataset[s] stored under /import/ """ history_id = history_id or os.environ['HISTORY_ID'] # The object version of bioblend is to slow in retrieving all datasets from a history # fallback to the non-object path gi = get_galaxy_connection(history_id=history_id, obj=False) for dataset_identifier in datasets_identifiers: file_path = '/import/%s' % dataset_identifier log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id, dataset_identifier) # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datasets = {ds[identifier_type]: ds['id'] for ds in history} if identifier_type == 'hid': dataset_identifier = int(dataset_identifier) dc.download_dataset(datasets[dataset_identifier], file_path=file_path, use_default_filename=False) else: log.debug('Cached, not re-downloading') return file_path
def delete_galaxy_histories(pks, purge, user): hss = History.objects.filter(pk__in=pks) for hs in hss: git = hs.galaxyinstancetracking gi, gu = get_gi_gu(user, git) hc = HistoryClient(gi) hc.delete_history(hs.galaxy_id, purge) hs.delete()
def get_user_history(history_id=None): """ Get all visible dataset infos of user history. Return a list of dict of each dataset. """ history_id = history_id or os.environ['HISTORY_ID'] gi = get_galaxy_connection(history_id=history_id, obj=False) hc = HistoryClient(gi) history = hc.show_history(history_id, visible=True, contents=True) return history
def get_user_history (history_id=None): """ Get all visible dataset infos of user history. Return a list of dict of each dataset. """ history_id = history_id or os.environ['HISTORY_ID'] gi = get_galaxy_connection(history_id=history_id, obj=False) hc = HistoryClient(gi) history = hc.show_history(history_id, visible=True, contents=True) return history
def transfer_filelist_from_ftp(gi, filelist, history_name): tc = ToolClient(gi) hc = HistoryClient(gi) st = get_time_stamp() hist = hc.create_history('{}-{}'.format(history_name, st)) uploaded_files = [] for f in filelist: upf = tc.upload_from_ftp(path=os.path.basename(f), history_id=hist['id'])['outputs'][0] print(upf) uploaded_files.append(upf) return uploaded_files, hist
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) histories = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) brassica_library = libraryClient.get_libraries( name=' Evolutionary Systems Biology') files = libraryClient.show_library(brassica_library[0]['id'], contents=True) #print(files) for f in files: if f['type'] == 'folder': continue # do nothing, try next #initial set #if itemp == 31: # break #print ("Name " + f['name']) replicate = f['name'].split('/')[-1][0] #print replicate if replicate == 'X': base = f['name'].split('/')[-1].split('.')[0] #print base forward_name = f['name'] forward_id = f['id'] print forward_name new_history_name = base print new_history_name hist = histories.create_history(name=new_history_name) dataset_F = histories.upload_dataset_from_library( hist['id'], forward_id) datamap = {} datamap['0'] = {'src': 'hda', 'id': dataset_F['id']} workflows = workflowsClient.get_workflows( name="Maize Small samples HISAT 2.1") workflow = workflows[0] try: w = workflowsClient.run_workflow(workflow['id'], datamap, history_id=hist['id']) except: print('Next')
def check_histories(run, api_key, host, logger): galaxy_instance = GalaxyInstance(host, key=api_key) history_client = HistoryClient(galaxy_instance) history_json_d = run + '/output' histories = read_all_histories(history_json_d, logger) (all_successful, all_running, all_failed, all_except, all_waiting, upload_history) = get_history_status(histories, history_client, logger) return (all_successful, all_running, all_failed, all_except, all_waiting, upload_history)
def get_history_data(pk, user, name_filter=None, data_type=None): hs = History.objects.get(pk=pk) git = hs.galaxyinstancetracking gi, gu = get_gi_gu(user, git) hc = HistoryClient(gi) hdatasets = hc.show_matching_datasets(hs.galaxy_id) if data_type: hdatasets = [h for h in hdatasets if h['extension'] in data_type] if name_filter: hdatasets = [h for h in hdatasets if h['name'] in name_filter] for h in hdatasets: h['galaxy_instance'] = git.name h['galaxy_instance_id'] = git.pk h['history_internal_id'] = pk return hdatasets
def get( dataset_id ): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ conf = _get_conf() gi = get_galaxy_connection() hc = HistoryClient( gi ) dc = DatasetClient( gi ) file_path = '/import/%s' % dataset_id dataset_mapping = dict( [(dataset['hid'], dataset['id']) for dataset in hc.show_history(conf['history_id'], contents=True)] ) try: hc.download_dataset(conf['history_id'], dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None) except: dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False) return file_path
def get_workflow_status(user): # go through every galaxy instance gits = GalaxyInstanceTracking.objects.filter( galaxyuser__internal_user=user) dj_wfs = Workflow.objects.all() # loop through instances status = [] for git in gits: ## loop through workflows for that instance gi, gu = get_gi_gu(user, git) wc = WorkflowClient(gi) hc = HistoryClient(gi) wfs = wc.get_workflows() for wf in wfs: wfd = wc.show_workflow(wf['id']) winvoke = wc.get_invocations(wf['id']) for wi in winvoke: wid = wc.show_invocation(wf['id'], wi['id']) h_l = hc.get_histories(wid['history_id'], deleted=True) if h_l: h = h_l[0] else: continue sd = get_status_d(wid) sd['name'] = wfd['name'] hd = hc.show_history(h['id']) sd['history_name'] = h['name'] datetime_object = datetime.strptime(hd['update_time'], '%Y-%m-%dT%H:%M:%S.%f') # sd['history_url'] = '{}{}'.format(git.url, hd['url']) sd['update_time'] = datetime_object.strftime( '%Y-%m-%d %H:%M:%S') sd['update_time_unix'] = unixtime(datetime_object) sd['galaxy_instance'] = git.name status.append(sd) status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True) return status
def get(dataset_id, history_id=None, use_objects=DEFAULT_USE_OBJECTS): """ Given the history_id that is displayed to the user, this function will download the file from the history and stores it under /import/ Return value is the path to the dataset stored under /import/ """ conf = _get_conf() gi = get_galaxy_connection(use_objects) file_path = '/import/%s' % dataset_id history_id = history_id or _get_history_id() # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): if use_objects: history = gi.histories.get(history_id) datasets = dict([(d.wrapped["hid"], d.id) for d in history.get_datasets()]) dataset = history.get_dataset(datasets[dataset_id]) dataset.download(open(file_path, 'wb')) else: hc = HistoryClient(gi) dc = DatasetClient(gi) dataset_mapping = dict([ (dataset['hid'], dataset['id']) for dataset in hc.show_history(history_id, contents=True) ]) try: hc.download_dataset(history_id, dataset_mapping[dataset_id], file_path, use_default_filename=False, to_ext=None) except: dc.download_dataset(dataset_mapping[dataset_id], file_path, use_default_filename=False) return file_path
def main(): galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY) toolClient = ToolClient(galaxyInstance) historyClient = HistoryClient(galaxyInstance) workflowsClient = WorkflowClient(galaxyInstance) libraryClient = LibraryClient(galaxyInstance) datasetClient = DatasetClient(galaxyInstance) histories = historyClient.get_histories(deleted=False) for hist in histories: hist_id = hist['id'] countSecondary = historyClient.show_matching_datasets( hist_id, name_filter=name_filter) if len(countSecondary) != 0: #print(countSecondary) file_path = dir_name + '/' + hist[ 'name'] + '_' + name_filter + '.' + ext #print(file_path) #print(countSecondary[0]['dataset_id']) datasetClient.download_dataset(countSecondary[0]['id'], file_path=file_path, use_default_filename=False) sys.exit()
def init_history_data_save_form(user, history_internal_id, galaxy_dataset_id): h = History.objects.get(pk=history_internal_id) gi, gu = get_gi_gu(user, h.galaxyinstancetracking) # save temp history object hc = HistoryClient(gi) history_d = hc.show_dataset(history_id=h.galaxy_id, dataset_id=galaxy_dataset_id) history_d['full_download_url'] = h.galaxyinstancetracking.url + history_d[ 'download_url'] history_d['abs_pth'] = '' data_pth = history_d['file_name'].replace('/export/', '') fullpth = os.path.join(h.galaxyinstancetracking.galaxy_root_path, data_pth) if os.path.exists(fullpth): history_d['abs_pth'] = fullpth print('ABS_PTH {}'.format(history_d['abs_pth'])) return history_d
parser.add_argument("-e", "--endpoint") parser.add_argument("-p", "--port") parser.add_argument("-s", "--sourcedir") args = parser.parse_args() host = "127.0.0.1" if not args.endpoint else args.endpoint port = "8080" addr = host + ":{}".format(port) if port else "" apik = args.apikey gi = GalaxyInstance(addr, apik) lc = LibraryClient(gi) fc = FoldersClient(gi) hc = HistoryClient(gi) library_name = "GDC Files" library_description = "A library of files acquired from the NCI Genomic Data Commons (GDC)" libs=lc.get_libraries() lib = {} if libs and isinstance(libs, dict): libs = [libs] if libs: for _lib in libs: if "name" in _lib and _lib["name"] == library_name: lib = _lib else: lib = lc.create_library(library_name, library_description) print("Library {} created:\n{}".format(library_name, lib))
def get_workflow_inputs(l, pkd, gi, git, history_name, library): # LibraryDatasetDatasetAssociation (ldda), LibraryDataset (ld), HistoryDatasetAssociation (hda), # or HistoryDatasetCollectionAssociation (hdca). st = get_time_stamp() hc = HistoryClient(gi) worklow_inputs_d = {} for table, filter, dinput_name, dinput_step, dinput_type in l: pks = pkd[str(table.prefix)] # will get multiple inputs here because we can multiple galaxyfilelinks per file. They are all the same # file so we can just get unique selected_objects = GenericFile.objects.filter(pk__in=pks).distinct() print('PKS', pks, dinput_type) print(selected_objects) if dinput_type == 'data_input': # can only use the first selection (need to use data collection for multiple files, currently this # approach doesn't support using 'multiple files' as input as not possible with BioBlend (i think) s = selected_objects[0] gid = s.galaxyfilelink_set.filter( galaxy_library=library)[0].galaxy_id print(gid) worklow_inputs_d[dinput_step] = {'id': gid, 'src': 'ld'} elif dinput_type == 'data_collection_input': element_identifiers = [] hist = hc.create_history('{}-(data-history-{})-{}'.format( history_name, dinput_name, st)) for s in selected_objects: print(s) gfl = s.galaxyfilelink_set.filter(galaxy_library=library)[0] if library: dataset = hc.upload_dataset_from_library( hist['id'], lib_dataset_id=gfl.galaxy_id) element_identifiers.append({ 'id': dataset['id'], 'name': os.path.basename(dataset['file_name']), 'src': 'hda' }) else: element_identifiers.append({ 'id': gfl.galaxy_id, 'name': gfl.genericfile.data_file.name, 'src': 'hda' }) c_descript = { 'collection_type': 'list', 'element_identifiers': element_identifiers, 'name': dinput_name, } dc = hc.create_dataset_collection(hist['id'], c_descript) worklow_inputs_d[dinput_step] = {'id': dc['id'], 'src': 'hdca'} return worklow_inputs_d
__version__ = '0.1.0' #import logging #logging.basicConfig(level=logging.DEBUG) upload_history_name = 'Uploaded data' upload_history_tag = 'user_data' workflow_tag = 'islandcompare' workflow_owner = 'brinkmanlab' application_tag = 'IslandCompare' ext_to_datatype = { "genbank": "genbank", "gbk": "genbank", "embl": "embl", "gbff": "genbank", "newick": "newick", "nwk": "newick" } WorkflowClient.set_max_get_retries(5) HistoryClient.set_max_get_retries(5) DatasetClient.set_max_get_retries(5) JobsClient.set_max_get_retries(5) InvocationClient.set_max_get_retries(5) # ======== Patched bioblend functions =========== # TODO Remove after upgrading to v0.16.0 def get_invocations(self, workflow_id, history_id=None, user_id=None, include_terminal=True, limit=None, view='collection', step_details=False): url = self._invocations_url(workflow_id) params = {'include_terminal': include_terminal, 'view': view, 'step_details': step_details} if history_id: params['history_id'] = history_id if user_id: params['user_id'] = user_id if limit: params['limit'] = limit return self._get(url=url, params=params)
def getGalaxyData(accession, dataType, species, foldChangeOnly): api_key = 'ENTER_API_KEY' galaxy_host = 'http://localhost:8080' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) dataDirectory = "Sybil/Shiny/data/" + accession if not os.path.exists(dataDirectory): os.makedirs(dataDirectory) wwwDirectory = "Shiny/www/microarrayQC.html/" + accession if not os.path.exists(wwwDirectory): os.makedirs(wwwDirectory) wwwDirectoryPlots = "Shiny/www/plots/" + accession if not os.path.exists(wwwDirectoryPlots): os.makedirs(wwwDirectoryPlots) #get the most recent history history = history_client.get_histories(name=accession)[0] #get the experiment level data getPCA(history, history_client, dataDirectory, galaxy_host) getChrDirTable(history, history_client, dataDirectory, galaxy_host) if dataType == "Microarray": getQC(history, history_client, wwwDirectory, galaxy_host) comparisons = getComparisonsTable(history, history_client, dataDirectory, galaxy_host) number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 if foldChangeOnly == "FALSE": pvalues = ["1", "0.05"] foldchanges = ["1", "1.5", "2"] thresholds = list(itertools.product(pvalues, foldchanges)) thresholds.pop(0) else: pvalues = ["1"] foldchanges = ["1.5", "2"] thresholds = list(itertools.product(pvalues, foldchanges)) for i in reversed(range(number_of_comparisons)): getFoldChange(i, history, history_client, dataDirectory, galaxy_host) for index, values in reversed( list( enumerate( list( itertools.product(range(number_of_comparisons), thresholds))))): (comparison, (pvalue, foldchange)) = values print(index) print(values) getStringNetworks(index, comparison, history, history_client, dataDirectory, galaxy_host) getBioGridNetworks(index, comparison, history, history_client, dataDirectory, galaxy_host) getPathways(index, comparison, pvalue, foldchange, history, history_client, dataDirectory, galaxy_host) getDrugEnrichment(index, comparison, pvalue, foldchange, history, history_client, dataDirectory, galaxy_host) getGOEnrichment(index, comparison, pvalue, foldchange, history, history_client, dataDirectory, wwwDirectoryPlots, galaxy_host) if species in ["Human", "Mouse"]: getTFs(index, comparison, pvalue, foldchange, history, history_client, dataDirectory, galaxy_host)
def runWorkflow(argDictionary, comparisons,samples): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import tempfile import time api_key = '' galaxy_host = '' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(argDictionary['accessionNumber']) comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt') sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular') if argDictionary['site'] == "ENA": #fastqs available on ENA tool_inputs = { "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} } #run the tool to get the data from ENA tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #sleep until all the fastq files are findable time.sleep(120) dirpath = tempfile.mkdtemp() fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0] fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath) num_lines = sum(1 for line in open(fileList)) -1 datasets=list() while len(datasets)!=num_lines: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) else: #for SRA if argDictionary['single'] == "TRUE": with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: print (sample) fileNames=str.split(sample["File"],"|") for fileName in fileNames: tool_inputs = { "input|input_select":"accession_number", "outputformat":"fastqsanger.gz", "input|accession":fileName } #run the tool to get the single data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs) else: with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for sample in reader: tool_inputs = { "accession_number":sample["File"] } #run the tool to get the paired data from SRA tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) datasets = getDatasetsByApproxName("fastq",history,history_client ) #get the fastQC tool for fastq in datasets: try: tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}} tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs) except Exception: pass #wait till complete while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #make dataset collections for quantification using the fastq files collections=list() with open(samples) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t') for row in reader: datasets=list() fileNames=str.split(row["File"],"|") for fileName in fileNames: datasets= datasets + getDatasetsByApproxName(fileName,history,history_client ) #make list of datasets collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client)) #get the correct kallisto index species = argDictionary['species'].lower() index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile") index = {'id': index, 'src': 'hda'} #run kallisto for every dataset collection for collection in collections: #set up the tool_inputs tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]} #often encounter connection broken error - possible problem with Certus server? #bypass by ignoring the exception tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs) # we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) # Run multiqc on kallisto logs and fastqc files datasets = getDatasetsByApproxName("RawData",history,history_client ) kallistoLogs = getDatasetsByApproxName(".log", history, history_client) tool_inputs = {} for i, dataset in enumerate(datasets+kallistoLogs): if not dataset["deleted"]: if dataset in datasets: software = 'fastqc' else: software = 'kallisto' params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']} tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params}) # #summarise with the multiQC tool tool_client.run_tool(history['id'],'multiqc', tool_inputs) multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0] #get all the abundance files to convert to gene level counts matrix datasets = getDatasetsByApproxName(".abundance",history,history_client ) #make a dataset collection for to make a countsMatrix collection = makeDataSetCollection(datasets,"abundances",history,history_client) #set up the tool_inputs tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']} #convert abundances to gene level counts matrix tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) txi = getDatasetsByApproxName("txi",history,history_client) #set up the tool_inputs for PCA tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs) pca = getDatasetsByApproxName("PCA",history,history_client)[0] #set up the tool_inputs for DESeq2 tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} , 'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']} #run deseq2 tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs) #run chrdir tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs) #we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) #get the foldchange data, cut and run pathway workflow dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) #tool_client.upload_file(outFileName, history['id'], file_type='tsv') return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular', history, history_client)['id']) return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular', history, history_client)['id']) return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular', history, history_client)['id']) return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular', history, history_client)['id']) return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular', history, history_client)['id']) return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable', 'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS'] outFileName = 'output/' + argDictionary['accessionNumber'] + '-workflowOutput.tsv' with open(outFileName, 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t") csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
conn = sqlite3.connect(db_path) # In[8]: api_key = open('../galaxy_api_key').read() galaxy_url = 'http://localhost:8080' gi = GalaxyInstance(galaxy_url, api_key) # ## alternative: load workflows from local files local_workflow_folder = '../tool-suggestion-engine/shared-workflows' list_of_wo = !ls $local_workflow_folder/*.gafor file in list_of_wo: gi.workflows.import_workflow_from_local_path(file) # ## alternative: load histories from files and convert histories to workflows hi = HistoryClient(gi) # download histories to a binary file # for id in [item['id'] for item in hi.get_histories()]: # hi.download_history(id, hi.export_history(id), open(str(id) + '.history', 'w'), chunk_size=4096) # upload binary history to database # print "not supported by the API" # ## clear workflow table for w in gi.workflows.get_workflows(): gi.workflows.delete_workflow(w['id']) # Display existing workflows !scripts/api/./display.py $(cat ../galaxy_api_key) http://localhost:8080/api/workflows # ## alternative: upload histories and workflows downloaded with SQL histo_read = pd.read_csv('histories.csv')
#!/usr/bin/env python """ Use the bioblend API to create a fresh history and add a set of files to the history that were imported into the container during the build Usage: create_and_upload_history.py history_name url1 url2 url3 ... """ import sys from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient gi = GalaxyInstance(url='http://localhost:80', key='admin') tc = ToolClient(gi) lc = HistoryClient(gi) details = lc.create_history(sys.argv[1]) print "HIST ID: %s" % details["id"] i = 0 for url in sys.argv: url_parts = url.split("/") fname = url_parts[-1] if i < 2: i+=1 continue i+=1 print "submitting %s as %s" % (url,fname) tc.put_url(url,details["id"],file_name=fname)
if sys.argv[1].endswith('.ini'): parser.read(sys.argv[1]) else: print "You passed %s I need a .ini file" %(sys.argv[1],) sys.exit(1) else: parser.read('configuration.ini') api_key = get_api_key(parser.get('Globals', 'api_file')) galaxy_host = parser.get('Globals', 'galaxy_host') file_name_re = re.compile(parser.get('Globals', 'sample_re')) galaxyInstance = GalaxyInstance(galaxy_host, key=api_key) historyClient = HistoryClient(galaxyInstance) toolClient = ToolClient(galaxyInstance) workflowClient = WorkflowClient(galaxyInstance) dataSetClient = DatasetClient(galaxyInstance) files = get_files(parser.get('Globals','fastq_dir')) if len(files) == 0: print "Not able to find any fastq files looked in %s" %(parser.get('Globals', 'fastq_dir')) else: print "Found fastq files running workflow for the following files (R2's will be added)" print ",".join(files) files_to_keep = {} for R1 in files: input_dir_path = os.path.dirname(R1)+"/" R2 = R1.replace('R1','R2') if not os.path.exists(R1):
def runWorkflow(argDictionary, comparisons): from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.histories import HistoryClient from bioblend.galaxy.tools import ToolClient from bioblend.galaxy.workflows import WorkflowClient from bioblend.galaxy.libraries import LibraryClient import time api_key = '' galaxy_host = 'http://localhost:8080/' gi = GalaxyInstance(url=galaxy_host, key=api_key) history_client = HistoryClient(gi) tool_client = ToolClient(gi) workflow_client = WorkflowClient(gi) library_client = LibraryClient(gi) history = history_client.create_history(row['accessionNumber']) # Import the galaxy workflow workflow = workflow_client.show_workflow('a799d38679e985db') input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt') # Run workflow on csv data to create a new history. params = dict() for key in workflow['steps'].keys(): params[key] = argDictionary datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}} workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params) # A diry hack, we want to wait until we have all datasets while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) dataset_id = getFoldChangeData(history, history_client)['id'] return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}] number_of_comparisons = -1 for line in open(comparisons): if not line.isspace(): number_of_comparisons += 1 for comparison in range(0, int(number_of_comparisons)): tool_inputs = { 'foldChangeTable' : {'id': dataset_id, 'src': 'hda'}, 'comparisonNumber' : comparison + 1 } tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs) while getNumberNotComplete(history['id'], history_client) > 0: time.sleep(10) if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]: pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary if argDictionary['species'] == "Rat": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt") if argDictionary['species'] == "Cow": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt") if argDictionary['species'] == "Horse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt") if argDictionary['species'] == "Pig": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt") if argDictionary['species'] == "Zebrafish": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt") pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 10 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection else: pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b') params = dict() for key in pathwayAnalysisWorkflow['steps'].keys(): params[key] = argDictionary # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow if argDictionary['species'] == "Mouse": network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} else: network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork") geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths") homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt") secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt") pathwayDatamap = {'4' : {'id': secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}} diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client) for index, diffExpData in enumerate(diffExpDataCollection): numCompleted = getNumberComplete(history['id'], history_client) + 14 print(numCompleted) pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'} workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], inputs = pathwayDatamap, history_id = history['id'], params = params) comparisonDict = getRowFromCsv(comparisons, index) if 'Factor1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2'] if 'Paired1' in comparisonDict.keys(): comparisonDict['Factor'] = comparisonDict['Paired1'] return_dict = {'accessionNo':argDictionary['accessionNumber'], 'factor':comparisonDict['Factor'], 'comparisonNum':comparisonDict['Numerator'], 'comparisonDenom':comparisonDict['Denominator'], 'foldChange': getUrl(diffExpData['id']), 'interactome': pathwayDatamap['0']['id'], 'exonLength': pathwayDatamap['2']['id']} while getNumberComplete(history['id'], history_client) < numCompleted: time.sleep(10) return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', history, history_client)['id']) return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf', history, history_client)['id']) return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', history, history_client)['id']) return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', history, history_client)['id']) return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', history, history_client)['id']) return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular', history, history_client)['id']) return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png', history, history_client)['id']) return_collection.append(return_dict) # Hard code keys to define the order keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange', 'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot'] with open('output/' + argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile: # Get headers from last dictionary in collection as first doesn't contain all keys csvOutput = csv.DictWriter(csvFile, keys) csvOutput.writeheader() csvOutput.writerows(return_collection) return return_collection
#!/usr/bin/env python import os import shutil import galaxy_ie_helpers from bioblend.galaxy.histories import HistoryClient hid = os.environ.get('DATASET_HID', None) history_id = os.environ['HISTORY_ID'] if hid not in ('None', None): galaxy_ie_helpers.get(int(hid)) shutil.copy('/import/%s' % hid, '/import/ipython_galaxy_notebook.ipynb') additional_ids = os.environ.get("ADDITIONAL_IDS", "") if additional_ids: gi = galaxy_ie_helpers.get_galaxy_connection(history_id=history_id, obj=False) hc = HistoryClient(gi) history = hc.show_history(history_id, contents=True) additional_ids = additional_ids.split(",") for hda in history: if hda["id"] in additional_ids: galaxy_ie_helpers.get(int(hda["hid"]))
return url + argsep + '&'.join( [ '='.join( t ) for t in args ] ) if __name__ == '__main__': # GET PATH NAMES AND EXTENSIONS FROM COMMAND LINE INPUT input_file_full = sys.argv[1] input_file_format = input_file_full[input_file_full.rfind(".")+1:len(input_file_full)] output_file_full = sys.argv[2] output_file_format = output_file_full[output_file_full.rfind(".")+1:len(output_file_full)] # CHOOSE CONVERTER tool_id = choose_converter(input_file_format,output_file_format) # INITIALIZE GALAXY galaxy_instance = GalaxyInstance(url=base_url, key=apikey) history_client = HistoryClient(galaxy_instance) tool_client = ToolClient(galaxy_instance) dataset_client = DatasetClient(galaxy_instance) history = history_client.create_history('tmp') # UPLOAD FILES input_file_1 = tool_client.upload_file(input_file_full, history['id'], type='txt') input_file_2 = tool_client.upload_file(input_file_full, history['id'], type='txt') params = {'input_numbers_001':{'src': 'hda', 'id': input_file_1['outputs'][0]['id']},'input_numbers_002':{'src': 'hda', 'id': input_file_2['outputs'][0]['id']}} wait_4_process(history['id'],"uploading files") # RUN CONVERSION runtool_output = tool_client.run_tool(history_id=history['id'], tool_id=tool_id, tool_inputs=params) wait_4_process(history['id'],"running tool") # DOWNLOAD CONVERTED FILE
class GalaxyHandler: ''' This class represents a Galaxy instance and provides functions to interact with that instance. ''' def __init__(self, url, api_key, container_file=None, oci_bundle=False): self.url = url self.api_key = api_key self.container_file = container_file self.oci_bundle = oci_bundle # Bioblend GalaxyInstance self.instance = None # Bioblend Clients self.user_client = None self.config_client = None self.workflow_client = None self.tool_client = None self.toolshed_client = None self.library_client = None self.roles_client = None self.history_client = None self.dataset_client = None def start_container_galaxy(self, writable=False, binds=None): ''' Run a containerized Galaxy instance. ''' with open(os.devnull, 'w') as FNULL: if self.oci_bundle: subprocess.call([ "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) else: if writable: subprocess.call([ "sudo", "singularity", "exec", "-w", self.container_file, "sh", "/galaxy/run.sh", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) elif binds: subprocess.call([ "singularity", "exec", "--bind", binds, self.container_file, "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) else: subprocess.call([ "singularity", "exec", self.container_file, "sh", "/galaxy/run.sh", "--daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) # Wait until the Galaxy instance is available but do not wait longer than 1 minute response = None t = 0 while not response: try: response = urllib.urlopen( self.url).getcode() # returns 200 if galaxy is up except: if t > 60: logger.error( "Galaxy is not up after 1 minute. Something went wrong. Maybe the container is corrupted. Try to open a shell in writable mode in the container and start Galaxy from the shell" ) exit(1) else: # Wait 5s until Galaxy is up logger.info( "Galaxy is not up ... wait 5 seconds and try again" ) t = t + 5 time.sleep(5) response = None continue self.instance_running = True return def stop_container_galaxy(self, sudo=False, bind_dirs=None, tmp_dir=None): ''' Stop a running containerized Galaxy instance. Remove an existing temporary directory ''' with open(os.devnull, 'w') as FNULL: if self.oci_bundle: # No binds, no Singularity, just plain run.sh stop-daemon subprocess.call(["sh", "/galaxy/run.sh", "--stop-daemon"], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) else: if sudo: # We use sudo only for importing workflows, so no binds. subprocess.call([ "sudo", "singularity", "exec", "-w", self.container_file, "sh", "/galaxy/run.sh", "--stop-daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) else: # We this only for workflow execution subprocess.call([ "singularity", "exec", "--bind", bind_dirs, self.container_file, "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log", "--pid-file", " /output/paster.pid", "--stop-daemon" ], stdout=FNULL, stderr=subprocess.STDOUT) self.instance_running = False time.sleep(5) # Remove temporary directories if tmp_dir: logger.info("Remove temporary directory: %s", tmp_dir) shutil.rmtree(tmp_dir) return def create_galaxy_instance(self, check_admin=False): ''' Create a bioblend GalaxyInstance. If check_admin = True, check if the user is admin of the galaxy instance. If not, return None. Returns False if an error occurs. ''' # Check if the URL is valid if not check_url(self.url): logger.error("URL to galaxy instance is not a valid URL: %s", self.url) return False # Try to create a bioblend Galaxy instance try: self.instance = GalaxyInstance(url=self.url, key=self.api_key) except: logger.error("Cannot create Galaxy instance.") return False return True def create_clients(self): ''' Create bioblend clients for the Galaxy instance. ''' # Create first client and check if the API works self.config_client = ConfigClient(self.instance) try: self.config_client.get_version() self.config_client.get_config() except: logger.error("Provided API-key does not work.") return False try: self.user_client = UserClient(self.instance) self.workflow_client = WorkflowClient(self.instance) self.tool_client = ToolClient(self.instance) self.toolshed_client = ToolShedClient(self.instance) self.library_client = LibraryClient(self.instance) self.roles_client = RolesClient(self.instance) self.history_client = HistoryClient(self.instance) self.dataset_client = DatasetClient(self.instance) except: logger.error("Error initializing other bioblend clients.") return False return True def initialize(self): ''' Initialize bioblend GalaxyInstance, clients, and check if the API works. Returns False if something went wrong. ''' if not self.create_galaxy_instance(): logger.error( "Cannot create bioblend GalaxyInstance for the GalaxyHandler") return False if not self.create_clients(): logger.error( "Cannot create bioblend clients for the GalaxyHandler") return False return True def create_user(self, name, mail, password): ''' Create a new Galaxy user for an specific Galaxy instance. Return the user_id and an api-key. ''' try: new_user = self.user_client.create_local_user(name, mail, password) except ConnectionError as e: # User already exists if "already exists" in e.body: new_user = self.user_client.get_users(f_email=mail)[0] new_user_id = new_user['id'] # Create API key for that user new_user_api_key = self.user_client.create_user_apikey(new_user_id) return (new_user_id, new_user_api_key) def create_input_library(self, name, user): ''' Create a dataset library for this instance. ''' try: # Create the library new_library = self.library_client.create_library(name, description=None, synopsis=None) logger.info("new_library ok") # Get the role of the user user_role_id = self.roles_client.get_roles()[0]['id'] logger.info("user_role_id ok") # Set permissions for that library # The following settings will enable the upload of input data by the user to this libary self.library_client.set_library_permissions( library_id=new_library['id'], access_in=user_role_id, modify_in=user_role_id, add_in=user_role_id, manage_in=user_role_id) return True except: logger.error("Cannot create Galaxy data library") return False def create_history(self, name): ''' Create a history and return the history id ''' history_dict = self.history_client.create_history(name) return history_dict['id'] def create_folder(self, library_name, user_mail): ''' Create a folder for the files in a library. This is used to store files for the a Galaxy library. Return a tuple containing the library id and the folder id. ''' # Assume that there is just one library with this name library = self.library_client.get_libraries(library_id=None, name=library_name, deleted=False)[0] folder = self.library_client.create_folder(library['id'], user_mail) return library['id'], folder[0]['id'] def upload_workflow_input(self, workflow_input, library_id, folder_id, mount_input_dir=True, input_dir=None): ''' Upload the input data for a workflow to Galaxy. The files are uploaded from the filesystem to a folder of an Galaxy library. The files are not duplicated, because just symbolic links will be created. If a user provides his own data, the files are 'uploaded' from the /input directory, which is just a mount point for a directory outside the container. If a user wants to use test data provided with the container, mount_input_dir is False and the directory inside the container has to be specified. ''' for step_uuid, step_param in workflow_input.iteritems(): if step_param['step_type'] == 'data_input': if mount_input_dir: # Input data is mounted in the container path = os.path.join('/input', step_param['filename']) else: # input_dir exists inside the container (e.g. workflow test data) path = os.path.join(input_dir, step_param['filename']) logger.info("Next upload: " + path) workflow_input[step_uuid][ 'dataset_id'] = self.library_client.upload_from_galaxy_filesystem( library_id, path, folder_id=folder_id, file_type=step_param['galaxy_file_type'], link_data_only='link_to_files') def export_output_history(self, history_id, output_dir): ''' Export all datasets of a history to the output directory. ''' # Get a list of all datasets in the output history history_datasets = self.history_client.show_history(history_id, contents=True, deleted=None, visible=None, details=None, types=None) # Iterate over the datasets of the history and download each dataset that has 'ok' state (e.g. the tool completed) for dataset in history_datasets: # Check the dataset status, e.g. if the corresponding task completed. Do not download input datasets! if dataset['state'] == 'ok': logger.info("Download dataset %s, state: %s", dataset['name'], dataset['state']) self.dataset_client.download_dataset(dataset['id'], file_path=output_dir, use_default_filename=True, wait_for_completion=False, maxwait=12000) else: logger.info("Do not download dataset %s, state: %s", dataset['name'], dataset['state'])
#!/usr/bin/python import sys; import galaxy_key; from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.libraries import LibraryClient from bioblend.galaxy.histories import HistoryClient #Create a file called galaxy_key and add your key there gi = GalaxyInstance(url=galaxy_key.galaxy_host, key=galaxy_key.galaxy_key); hc = HistoryClient(gi); my_history = hc.get_histories()[0]; my_history_id = my_history['id']; dataset = hc.show_matching_datasets(my_history_id, 'sum_vector')[0]; dataset_provenance = hc.show_dataset_provenance(my_history_id, dataset['id']); print(dataset_provenance);
def main(): parser = OptionParser() parser.add_option("-A", "--auth-file", dest="auth_filename", help="JSON file with Galaxy host and key", metavar="FILE") parser.add_option( "-f", "--uuid-file", dest="uuids_filename", help= "TSV file with list of UUIDs to import. The first row is assumed to be a header", metavar="FILE") parser.add_option( "-H", "--target-history", dest="target_history", help="Target history name in Galaxy to copy datasets into", metavar="HISTORY_NAME") (options, args) = parser.parse_args() if (not options.auth_filename): print_error_and_exit('Authentication file not provided') #if(not options.uuids_filename): #print_error_and_exit('TSV file with UUIDs not provided'); if (not options.target_history): print_error_and_exit( 'Galaxy history name where datasets will be imported not provided') #Read authentication info galaxy_host, galaxy_key = parse_auth_file(options.auth_filename) gi = GalaxyInstance(url=galaxy_host, key=galaxy_key) history_client = HistoryClient(gi) library_client = LibraryClient(gi) folder_client = FoldersClient(gi) #Read UUIDs file if (options.uuids_filename): try: uuids_fd = open(options.uuids_filename, 'rb') except IOError: print_error_and_exit('Could not open TSV file with UUIDs ' + options.uuids_filename) else: uuids_fd = sys.stdin queried_ds_uuid_dict = parse_TSV_file(uuids_fd) #Search for datasets find_datasets_by_uuids_in_histories(gi, history_client, queried_ds_uuid_dict) find_datasets_by_uuids_in_libraries(gi, library_client, queried_ds_uuid_dict) dataset_info_list = queried_ds_uuid_dict.values() #Validate datasets, discard repeats validate_queried_dataset_info(dataset_info_list) #Get/create target history target_history_id = get_or_create_history_id(gi, history_client, options.target_history) #Copy datasets from library to history copy_from_lib(gi, history_client, dataset_info_list, target_history_id=target_history_id) #Copy from history to /tmp and back - don't use anymore #copy_to_tmp_lib_and_back(gi, library_client, history_client, folder_client, '/tmp', dataset_info_list, target_history_id=target_history_id); #Copy history datasets from other histories copy_other_history_datasets(gi, history_client, dataset_info_list, target_history_id=target_history_id) #Create dataset collections create_dataset_collections(gi, history_client, dataset_info_list, target_history_id=target_history_id)
def get_history_status(user, hist_id=None): # go through every galaxy instance gits = GalaxyInstanceTracking.objects.filter( galaxyuser__internal_user=user) # loop through instances status = [] for git in gits: ## loop through workflows for that instance gi, gu = get_gi_gu(user, git) hc = HistoryClient(gi) hists = hc.get_histories() # loop through and create a list of dictionaries for our django table for hist in hists: sd = {} # add useful info if hist_id and hist['id'] != hist_id: continue history_info = hc.show_history(hist['id']) # add status info sd_bioblend = hc.get_status(hist['id']) state_details = sd_bioblend['state_details'] sd.update(state_details) sd['estimated_progress'] = sd_bioblend['percent_complete'] datetime_object = datetime.strptime(history_info['update_time'], '%Y-%m-%dT%H:%M:%S.%f') sd['update_time'] = datetime_object.strftime('%Y-%m-%d %H:%M:%S') sd['update_time_unix'] = unixtime(datetime_object) sd['galaxy_instance'] = git.name sd['name'] = hist['name'] hsq = History.objects.filter(galaxy_id=hist['id'], galaxyinstancetracking=git) if hsq: hs = hsq[0] hs.name = hist['name'] hs.update_time = datetime_object.strftime('%Y-%m-%d %H:%M:%S') hs.empty = state_details['empty'] hs.error = state_details['error'] hs.failed_metadata = state_details['failed_metadata'] hs.new = state_details['new'] hs.ok = state_details['ok'] hs.paused = state_details['paused'] hs.running = state_details['running'] hs.queued = state_details['queued'] hs.setting_metadata = state_details['setting_metadata'] hs.upload = state_details['upload'] hs.estimated_progress = sd_bioblend['percent_complete'] else: hs = History( galaxyinstancetracking=git, name=hist['name'], update_time=datetime_object.strftime('%Y-%m-%d %H:%M:%S'), empty=state_details['empty'], error=state_details['error'], failed_metadata=state_details['failed_metadata'], new=state_details['new'], ok=state_details['ok'], paused=state_details['paused'], running=state_details['running'], queued=state_details['queued'], setting_metadata=state_details['setting_metadata'], upload=state_details['upload'], galaxy_id=hist['id'], estimated_progress=sd_bioblend['percent_complete']) hs.save() sd['history_data_bioblend_list'] = '/galaxy/history_data_bioblend_list/{}'.format( hs.pk) status.append(sd) status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True) return status
def get(datasets_identifiers, identifier_type='hid', history_id=None, retrieve_datatype=None): """ Given the history_id that is displayed to the user, this function will either search for matching files in the history if the identifier_type is set to 'regex', otherwise it will directly download the file[s] from the history and stores them under /import/. Return value[s] are the path[s] to the dataset[s] stored under /import/ """ history_id = history_id or os.environ['HISTORY_ID'] # The object version of bioblend is to slow in retrieving all datasets from a history # fallback to the non-object path gi = get_galaxy_connection(history_id=history_id, obj=False) file_path_all = [] datatypes_all = [] if type(datasets_identifiers) is not list: datasets_identifiers = [datasets_identifiers] if identifier_type == "regex": datasets_identifiers = find_matching_history_ids(datasets_identifiers) identifier_type = "hid" for dataset_id in datasets_identifiers: file_path = '/import/%s' % dataset_id log.debug('Downloading gx=%s history=%s dataset=%s', gi, history_id, dataset_id) # Cache the file requests. E.g. in the example of someone doing something # silly like a get() for a Galaxy file in a for-loop, wouldn't want to # re-download every time and add that overhead. if not os.path.exists(file_path): hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datasets = {ds[identifier_type]: ds['id'] for ds in history} if retrieve_datatype: datatypes_all.append( {ds[identifier_type]: ds['extension'] for ds in history}) if identifier_type == 'hid': dataset_id = int(dataset_id) dc.download_dataset(datasets[dataset_id], file_path=file_path, use_default_filename=False) else: hc = HistoryClient(gi) dc = DatasetClient(gi) history = hc.show_history(history_id, contents=True) datatypes_all.append( {ds[identifier_type]: ds['extension'] for ds in history}) log.debug('Cached, not re-downloading') file_path_all.append(file_path) ## First path if only one item given, otherwise all paths. ## Should not break compatibility. if retrieve_datatype: if len(file_path_all) == 1: dataset_number = int(file_path_all[0].strip().split("/")[-1]) return file_path_all, datatypes_all[0][dataset_number] else: datatype_multi = dict() for i in file_path_all: dataset_number = int(i.strip().split("/")[-1]) datatype_multi[dataset_number] = datatypes_all[0][ dataset_number] return file_path_all, datatype_multi else: return file_path_all[0] if len(file_path_all) == 1 else file_path_all