Python WorkflowClient Examples, bioblend.galaxy.workflows.WorkflowClient Python Examples

Example #1

0

Show file

def __main__():
    parser = argparse.ArgumentParser(description = 'Import workflows from a local directory')
    parser.add_argument('-p', '--port', help = 'port number from your docker container that you map to your host machine. The default is 80', default = '80')
    parser.add_argument('-k', '--key', help = 'user API key')
    args = parser.parse_args()
    
    # port and api key
    port = args.port
    api_key = args.key
    
    # galaxy client instance
    galaxy_home = environ['GALAXY_HOME']
    galaxy_client = GalaxyInstance(url = "http://127.0.0.1:" + port, key = api_key)
    
    if galaxy_client:
        # workflow client instance
        workflow_client = WorkflowClient(galaxy_client)
        
        my_workflows_dir = environ['GALAXY_HOME'] + '/my_workflows'
        workflow_files = []
        for f in listdir(my_workflows_dir):
            if isfile(join(my_workflows_dir, f)):
                f_path = join(my_workflows_dir, f)
                workflow_client.import_workflow_from_local_path(f_path)
                print('Imported workflow: ' + f)

Example #2

0

Show file

File: workflow-cmdline.py Project: scalablegenomics/utils

def main():
    try:
        input_path = sys.argv[1]
        output_path = sys.argv[2]

        galaxyInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY)
        historyClient = HistoryClient(galaxyInstance)
        toolClient = ToolClient(galaxyInstance)
        workflowClient = WorkflowClient(galaxyInstance)
        datasetClient = DatasetClient(galaxyInstance)

        history = historyClient.create_history('tmp')
        uploadedFile = toolClient.upload_file(input_path, history['id'] )

        workflow = workflowClient.show_workflow(WORKFLOW_ID)
        dataset_map  = {workflow['inputs'].keys()[0]: {'id': uploadedFile['outputs'][0]['id'], 'src': 'hda'}}
        params = {TOOL_ID_IN_GALAXY: {'param': 'reference_genome', 'value': 'hg19'}}
        output = workflowClient.run_workflow(WORKFLOW_ID, dataset_map, params, history['id'])

        downloadDataset(datasetClient, findDatasedIdByExtention(datasetClient, output, 'bed'), output_path)
        #delete history
        historyClient.delete_history(history['id'])
        #if galaxy instance support dataset purging
        #historyClient.delete_history(history['id'], True)

    except IndexError:
        print 'usage: %s key url workflow_id history step=src=dataset_id' % os.path.basename(sys.argv[0])
        sys.exit(1)

Example #3

0

Show file

def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    histories = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)

    brassica_library = libraryClient.get_libraries(
        name=' Evolutionary Systems Biology')
    files = libraryClient.show_library(brassica_library[0]['id'],
                                       contents=True)
    #print(files)
    itemp = 0
    for f in files:
        if f['type'] == 'folder':
            continue  # do nothing, try next
        #initial set
        #if itemp == 31:
        #	break

        #print ("Name " + f['name'])

        replicate = f['name'].split('_')[-1].split('.')[0]
        #print replicate
        if replicate == '1':
            itemp = itemp + 1
            if not (itemp >= 71 and itemp <= 92):
                continue
            base = f['name'].split('_')[:-1]
            #print base
            forward_name = f['name']
            reverse_name = '_'.join(base) + '_2.fastq.bz2'
            forward_id = f['id']
            files2 = libraryClient.show_library(brassica_library[0]['id'],
                                                contents=True)
            for f2 in files2:
                if f2['name'] == reverse_name:
                    reverse_id = f2['id']
            print forward_name
            print reverse_name
            new_history_name = f['name'].split('_')[7] + "_" + f['name'].split(
                '_')[-3] + "_" + f['name'].split('_')[-2]
            print new_history_name
            hist = histories.create_history(name=new_history_name)
            dataset_F = histories.upload_dataset_from_library(
                hist['id'], forward_id)
            dataset_R = histories.upload_dataset_from_library(
                hist['id'], reverse_id)
            datamap = {}
            datamap['0'] = {'src': 'hda', 'id': dataset_F['id']}
            datamap['1'] = {'src': 'hda', 'id': dataset_R['id']}
            workflows = workflowsClient.get_workflows(name="Maize HISAT 2.1")
            workflow = workflows[0]
            try:
                w = workflowsClient.run_workflow(workflow['id'],
                                                 datamap,
                                                 history_id=hist['id'])
            except:
                print('Next')

Example #4

0

Show file

def check_galaxy(api_key, galaxy_url):
    gi = GalaxyInstance(galaxy_url, key=api_key)
    gi.verify = False
    wc = WorkflowClient(gi)
    try:
        wc.get_workflows()
    except ConnectionError as e:
        raise forms.ValidationError(
            'Something is wrong with Galaxy connection, please check')

Example #5

0

Show file

File: bit-workflow_install_docker.py Project: myoshimura080822/galaxy_in_docker_custom_bit_wf

def main():
    try:
        gInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY)
        wClient = WorkflowClient(gInstance)

        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> get current workflowlist...'
        gInstance = GalaxyInstance(url = GALAXY_URL, key=API_KEY)
        wClient = WorkflowClient(gInstance)
        dataset = wClient.get_workflows()
        wf_namelist = [x['name'] for x in dataset if x['deleted'] == False]
        print wf_namelist
        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> clone BiT Workflows from github...'
        if not os.path.exists(wf_dname + '/' + repo_name):
            makeDir(wf_dname)
            os.chdir(wf_dname)
            git_url = 'https://github.com/myoshimura080822/' + repo_name + '.git'
            Repo.clone_from(git_url, repo_name)
        else:
            print repo_name + ' already cloned. To update, Please delete, move or rename dir before this script execute.'
            return 0

        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> delete and inport workflow files...'

        mytoolsdir = wf_dname + '/' + repo_name + '/'
        clone_wf_list = [file.replace(mytoolsdir, "") for file in get_all_ga(mytoolsdir)]
        print clone_wf_list
        delete_itm =[]
        [[ delete_itm.append(y) for y in wf_namelist if y.find(x.replace('.ga','')) > -1] for x in clone_wf_list]
        print delete_itm
        id_list = []
        [[id_list.append(x['id']) for x in dataset if x['name'].find(y) > -1] for y in delete_itm]
        print id_list
        [wClient.delete_workflow(x) for x in id_list]
        print wClient.get_workflows()
        
        wf_file_path =  get_all_ga(mytoolsdir)
        [wClient.import_workflow_from_local_path(file) for file in wf_file_path]

        print wClient.get_workflows()

        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> script ended :)'
        return 0

    except:
        info = sys.exc_info()
        tbinfo = traceback.format_tb( info[2] )
        print 'Error Info...'.ljust( 80, '=' )
        for tbi in tbinfo:
            print tbi
        print '  %s' % str( info[1] )
        print '\n'.rjust( 85, '=' )
        sys.exit(1)

Example #6

0

Show file

File: SendLaiMaizeWorkFlow.py Project: blad00/PythonLearning

def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    histories = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)

    brassica_library = libraryClient.get_libraries(
        name=' Evolutionary Systems Biology')
    files = libraryClient.show_library(brassica_library[0]['id'],
                                       contents=True)
    #print(files)

    for f in files:
        if f['type'] == 'folder':
            continue  # do nothing, try next
        #initial set
        #if itemp == 31:
        #	break

        #print ("Name " + f['name'])

        replicate = f['name'].split('/')[-1][0]
        #print replicate
        if replicate == 'X':

            base = f['name'].split('/')[-1].split('.')[0]
            #print base
            forward_name = f['name']
            forward_id = f['id']
            print forward_name

            new_history_name = base
            print new_history_name
            hist = histories.create_history(name=new_history_name)
            dataset_F = histories.upload_dataset_from_library(
                hist['id'], forward_id)
            datamap = {}
            datamap['0'] = {'src': 'hda', 'id': dataset_F['id']}
            workflows = workflowsClient.get_workflows(
                name="Maize Small samples HISAT 2.1")
            workflow = workflows[0]
            try:
                w = workflowsClient.run_workflow(workflow['id'],
                                                 datamap,
                                                 history_id=hist['id'])
            except:
                print('Next')

Example #7

0

Show file

 def create_clients(self):
     '''
     Create bioblend clients for the Galaxy instance.
     '''
     # Create first client and check if the API works
     self.config_client = ConfigClient(self.instance)
     try:
         self.config_client.get_version()
         self.config_client.get_config()
     except:
         logger.error("Provided API-key does not work.")
         return False
     try:
         self.user_client = UserClient(self.instance)
         self.workflow_client = WorkflowClient(self.instance)
         self.tool_client = ToolClient(self.instance)
         self.toolshed_client = ToolShedClient(self.instance)
         self.library_client = LibraryClient(self.instance)
         self.roles_client = RolesClient(self.instance)
         self.history_client = HistoryClient(self.instance)
         self.dataset_client = DatasetClient(self.instance)
     except:
         logger.error("Error initializing other bioblend clients.")
         return False
     return True

Example #8

0

Show file

def workflow_sync(user):

    # get all instances for that user
    gits = GalaxyInstanceTracking.objects.filter(
        galaxyuser__internal_user=user)
    dj_wfs = Workflow.objects.all()
    # loop through instances
    all_wfs = []
    for git in gits:
        ## loop through workflows for that instance
        gi, gu = get_gi_gu(user, git)
        wc = WorkflowClient(gi)
        wfs = wc.get_workflows()
        all_wfs.extend(wfs)
        for wf in wfs:
            wfd = wc.show_workflow(wf['id'])
            ### check if id of the workflow already in galaxy
            wjson = wc.export_workflow_dict(wf['id'])

            dj_wf = dj_wfs.filter(galaxy_id=wfd['id'])
            if dj_wf:
                if not dj_wf[0].latest_workflow_uuid == wf[
                        'latest_workflow_uuid']:
                    dj_wf_update = dj_wf[0]
                    dj_wf_update.latest_workflow_uuid = wf[
                        'latest_workflow_uuid']
                    dj_wf_update.name = wf['name']
                    dj_wf_update.workflowjson = wjson
                    dj_wf_update.save()
            else:
                workflow = Workflow(galaxy_id=wf['id'],
                                    name=wf['name'],
                                    description='added automatically',
                                    galaxyinstancetracking=git,
                                    added_by=user,
                                    workflowjson=wjson)
                workflow.save()

    all_wfs_gi = [w['id'] for w in all_wfs]

    ## check if workflow is currently accessible
    Workflow.objects.exclude(galaxy_id__in=all_wfs_gi).update(accessible=False)

Example #9

0

Show file

def run_galaxy_workflow(wid, user, git, pkd, l, history_name, library):

    gi, gu = get_gi_gu(user, git)

    st = get_time_stamp()

    workflow_input_d = get_workflow_inputs(l, pkd, gi, git, history_name,
                                           library)

    wc = WorkflowClient(gi)
    print(workflow_input_d)
    workflow = Workflow.objects.get(id=wid)

    wf = wc.get_workflows(workflow_id=workflow.galaxy_id)[0]

    wfi = wc.invoke_workflow(wf['id'],
                             inputs=workflow_input_d,
                             import_inputs_to_history=True,
                             history_name='{}_({})'.format(history_name, st))

    return wfi

Example #10

0

Show file

def get_galaxy_workflow_inputs(w, user):
    wf = w.workflowfile
    git = w.galaxyinstancetracking
    wf_name = w.name

    api_key = GalaxyUser.objects.get(internal_user=user,
                                     galaxyinstancetracking=git).api_key
    galaxy_url = git.url

    gi = GalaxyInstance(galaxy_url, key=api_key)
    gi.verify = False
    wc = WorkflowClient(gi)
    wfd = wf.read()

    jsonload = json.loads(wfd)

    now = datetime.datetime.now()

    jsonload['name'] = '{} dj-upload[{} {}]'.format(jsonload['name'], wf_name,
                                                    now.strftime("%Y-%m-%d"))

    wfimp = wc.import_workflow_json(jsonload)

    return check_workflow_data_inputs(wfimp['id'], wc), wfimp['id']

Example #11

0

Show file

File: 1Par_download_dataset_from_histories.py Project: blad00/PythonLearning

def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    historyClient = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)
    datasetClient = DatasetClient(galaxyInstance)

    histories = historyClient.get_histories(deleted=False)
    for hist in histories:
        hist_id = hist['id']
        countSecondary = historyClient.show_matching_datasets(
            hist_id, name_filter=name_filter)
        if len(countSecondary) != 0:
            #print(countSecondary)
            file_path = dir_name + '/' + hist[
                'name'] + '_' + name_filter + '.' + ext
            #print(file_path)
            #print(countSecondary[0]['dataset_id'])
            datasetClient.download_dataset(countSecondary[0]['id'],
                                           file_path=file_path,
                                           use_default_filename=False)
    sys.exit()

Example #12

0

Show file

def get_workflow_status(user):
    # go through every galaxy instance
    gits = GalaxyInstanceTracking.objects.filter(
        galaxyuser__internal_user=user)
    dj_wfs = Workflow.objects.all()
    # loop through instances
    status = []
    for git in gits:
        ## loop through workflows for that instance
        gi, gu = get_gi_gu(user, git)
        wc = WorkflowClient(gi)
        hc = HistoryClient(gi)
        wfs = wc.get_workflows()
        for wf in wfs:
            wfd = wc.show_workflow(wf['id'])
            winvoke = wc.get_invocations(wf['id'])
            for wi in winvoke:
                wid = wc.show_invocation(wf['id'], wi['id'])
                h_l = hc.get_histories(wid['history_id'], deleted=True)

                if h_l:
                    h = h_l[0]
                else:
                    continue
                sd = get_status_d(wid)
                sd['name'] = wfd['name']
                hd = hc.show_history(h['id'])
                sd['history_name'] = h['name']
                datetime_object = datetime.strptime(hd['update_time'],
                                                    '%Y-%m-%dT%H:%M:%S.%f')
                # sd['history_url'] =  '{}{}'.format(git.url, hd['url'])

                sd['update_time'] = datetime_object.strftime(
                    '%Y-%m-%d %H:%M:%S')
                sd['update_time_unix'] = unixtime(datetime_object)
                sd['galaxy_instance'] = git.name
                status.append(sd)

    status = sorted(status, key=lambda k: k['update_time_unix'], reverse=True)

    return status

Example #13

0

Show file

File: bit-workflow_install.py Project: myoshimura080822/galaxy-mytools_setup

def main():
    try:
        gInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
        wClient = WorkflowClient(gInstance)

        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> get current workflowlist...'
        gInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
        wClient = WorkflowClient(gInstance)
        dataset = wClient.get_workflows()
        wf_namelist = [x['name'] for x in dataset if x['deleted'] == False]
        print wf_namelist
        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> clone BiT Workflows from github...'
        if not os.path.exists(wf_dname + '/galaxy-workflow_rnaseq'):
            makeDir(wf_dname)
            os.chdir(wf_dname)
            git_url = 'https://github.com/myoshimura080822/galaxy-workflow_rnaseq.git'
            Repo.clone_from(git_url, 'galaxy-workflow_rnaseq')
        else:
            print 'BiT Workflow already cloned. To update, Please delete, move or rename "/galaxy-workflow_rnaseq" before script execute.'
            return 0

        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> delete and inport workflow files...'

        mytoolsdir = wf_dname + '/galaxy-workflow_rnaseq/'
        clone_wf_list = [
            file.replace(mytoolsdir, "") for file in get_all_ga(mytoolsdir)
        ]
        print clone_wf_list
        delete_itm = []
        [[
            delete_itm.append(y) for y in wf_namelist
            if y.find(x.replace('.ga', '')) > -1
        ] for x in clone_wf_list]
        print delete_itm
        id_list = []
        [[id_list.append(x['id']) for x in dataset if x['name'].find(y) > -1]
         for y in delete_itm]
        print id_list
        [wClient.delete_workflow(x) for x in id_list]
        print wClient.get_workflows()
        [
            wClient.import_workflow_from_local_path(file)
            for file in get_all_ga(mytoolsdir)
        ]
        print wClient.get_workflows()

        print ':::::::::::::::::::::::::::::::::::::::::::'
        print '>>>>>>>>>>>>>>>>> script ended :)'
        return 0

    except:
        info = sys.exc_info()
        tbinfo = traceback.format_tb(info[2])
        print 'Error Info...'.ljust(80, '=')
        for tbi in tbinfo:
            print tbi
        print '  %s' % str(info[1])
        print '\n'.rjust(85, '=')
        sys.exit(1)

Example #14

0

Show file

File: workflow_runner.py Project: tombair/Galaxy_automation

    else:
        print "You passed %s I need a .ini file" %(sys.argv[1],)
        sys.exit(1)
else:
    parser.read('configuration.ini')
api_key = get_api_key(parser.get('Globals', 'api_file'))
galaxy_host = parser.get('Globals', 'galaxy_host')

file_name_re = re.compile(parser.get('Globals', 'sample_re'))



galaxyInstance = GalaxyInstance(galaxy_host, key=api_key)
historyClient = HistoryClient(galaxyInstance)
toolClient = ToolClient(galaxyInstance)
workflowClient = WorkflowClient(galaxyInstance)
dataSetClient = DatasetClient(galaxyInstance)

files = get_files(parser.get('Globals','fastq_dir'))
if len(files) == 0:
        print "Not able to find any fastq files looked in %s" %(parser.get('Globals', 'fastq_dir'))
else:
    print "Found fastq files running workflow for the following files (R2's will be added)"
    print ",".join(files)
    files_to_keep = {}
    for R1 in files:
        input_dir_path = os.path.dirname(R1)+"/"
        R2 = R1.replace('R1','R2')
        if not os.path.exists(R1):
            print "%s File Not Found" % (R1, )
            raise Exception

Example #15

0

Show file

File: library_parser.py Project: tombair/Galaxy_automation

parser = SafeConfigParser()

if len(sys.argv) >= 2:
    if sys.argv[1].endswith('.ini'):
        parser.read(sys.argv[1])
    else:
        print "You passed %s I need a .ini file" %(sys.argv[1],)
        sys.exit(1)
else:
    parser.read('configuration.ini')

api_key = get_api_key(parser.get('Globals', 'api_file'))


galaxy_host = parser.get('Globals', 'galaxy_host')
galaxyInstance = GalaxyInstance(galaxy_host, key=api_key)
libraryInstance = LibraryClient(galaxyInstance)
libs = libraryInstance.get_libraries(name=parser.get('Globals','default_lib'))
details = libraryInstance.get_folders(library_id=libs[0]['id'])
folder = libraryInstance.show_library(library_id=libs[0]['id'],contents=True)
for f in folder[1:]:
    print "%s:%s" % (f['name'],f['id'])

workflow = WorkflowClient(galaxyInstance)
wf = workflow.get_workflows()
for w in wf:
    print w['id']
    print w['name']
    print w['owner']

Example #16

0

Show file

File: runRNASeq.py Project: soulj/SkeletalVis-Pipeline

def runWorkflow(argDictionary, comparisons,samples):
    from bioblend.galaxy import GalaxyInstance
    from bioblend.galaxy.histories import HistoryClient
    from bioblend.galaxy.tools import ToolClient
    from bioblend.galaxy.workflows import WorkflowClient
    from bioblend.galaxy.libraries import LibraryClient
    import tempfile
    
    
    import time
    api_key = ''
    galaxy_host = ''

    gi = GalaxyInstance(url=galaxy_host, key=api_key)

    history_client = HistoryClient(gi)
    tool_client = ToolClient(gi)
    workflow_client = WorkflowClient(gi)
    library_client = LibraryClient(gi)
    
    history = history_client.create_history(argDictionary['accessionNumber'])
    
    comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt')
    sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular')
    
    if argDictionary['site'] == "ENA":
        #fastqs available on ENA    
        tool_inputs = {
                "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'}
                
            }
        
    
        #run the tool to get the data from ENA
        tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs)
        
        #we want to wait until we have all datasets
        while getNumberNotComplete(history['id'], history_client) > 0:
            time.sleep(10)
            
        
        #sleep until all the fastq files are findable
        time.sleep(120)
        
        
        dirpath = tempfile.mkdtemp()
        fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0]
        fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath)
        num_lines = sum(1 for line in open(fileList)) -1
        
        datasets=list()
        while len(datasets)!=num_lines:
                    time.sleep(10)
                    datasets = getDatasetsByApproxName("fastq",history,history_client )                
    else: #for SRA       
    
        if argDictionary['single'] == "TRUE":
            with open(samples) as tsvfile:
                reader = csv.DictReader(tsvfile, delimiter='\t')
                for sample in reader:
                    print (sample)
                    fileNames=str.split(sample["File"],"|")
                    for fileName in fileNames:                    
                        tool_inputs = {
                                "input|input_select":"accession_number",
                                "outputformat":"fastqsanger.gz",
                                "input|accession":fileName   
                            }
                        #run the tool to get the single data from SRA
                        tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs)
               
        else:
             with open(samples) as tsvfile:
                reader = csv.DictReader(tsvfile, delimiter='\t')
           
                for sample in reader:            
                    tool_inputs = {
                            "accession_number":sample["File"]           
                        }
                    #run the tool to get the paired data from SRA
                    tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs)
                
        while getNumberNotComplete(history['id'], history_client) > 0:
            time.sleep(10)
     
    datasets = getDatasetsByApproxName("fastq",history,history_client )
    #get the fastQC tool
    for fastq in datasets:
        try:
            tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}}
            tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs)
        except Exception:
            pass
        
    #wait till complete
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
    
    #make dataset collections for quantification using the fastq files
    collections=list()
    with open(samples) as tsvfile:
        reader = csv.DictReader(tsvfile, delimiter='\t')
        for row in reader:
            datasets=list()
            fileNames=str.split(row["File"],"|")
            
            for fileName in fileNames:
                datasets= datasets + getDatasetsByApproxName(fileName,history,history_client )
                    
            #make list of datasets
            collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client))
            
            
            
    #get the correct kallisto index
    species = argDictionary['species'].lower()
    index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile")
    index = {'id': index, 'src': 'hda'}
    
    #run kallisto for every dataset collection
    for collection in collections:
        #set up the tool_inputs
        tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]}
        
        
        #often encounter connection broken error - possible problem with Certus server?
        #bypass by ignoring the exception
        tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs)


    # we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    # Run multiqc on kallisto logs and fastqc files
    datasets = getDatasetsByApproxName("RawData",history,history_client )
    kallistoLogs = getDatasetsByApproxName(".log", history, history_client)
    
    tool_inputs = {}
    for i, dataset in enumerate(datasets+kallistoLogs):
        if not dataset["deleted"]:
            if dataset in datasets:
                software = 'fastqc'
            else:
                software = 'kallisto'
            params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']}
            tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params})

#    #summarise with the multiQC tool
    tool_client.run_tool(history['id'],'multiqc', tool_inputs)
    
    multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0]
    
        
    #get all the abundance files to convert to gene level counts matrix
    datasets = getDatasetsByApproxName(".abundance",history,history_client )
    
    #make a dataset collection for to make a countsMatrix
    collection = makeDataSetCollection(datasets,"abundances",history,history_client)
    
    
    #set up the tool_inputs
    tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']}
    
    #convert abundances to gene level counts matrix
    tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs)
    
    # A diry hack, we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
    
    txi = getDatasetsByApproxName("txi",history,history_client)
    

    #set up the tool_inputs for PCA
    tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']}
    
    #run deseq2
    tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs)
    
    pca = getDatasetsByApproxName("PCA",history,history_client)[0]
    
       
    #set up the tool_inputs for DESeq2
    tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,
    'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']}
    
    #run deseq2
    tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs)
         
    #run chrdir
    tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs)
    
        #we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
        
    #get the foldchange data, cut and run pathway workflow    
    dataset_id = getFoldChangeData(history, history_client)['id']
    
    
    return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}]
    
    
    number_of_comparisons = -1
    for line in open(comparisons):
        if not line.isspace():
            number_of_comparisons += 1

    for comparison in range(0, int(number_of_comparisons)):
        tool_inputs = {
            'foldChangeTable' : {'id': dataset_id, 'src': 'hda'},
            'comparisonNumber' : comparison + 1
        }
        tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs)
        
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
        
    if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]:
        pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
        
        if argDictionary['species'] == "Rat":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        if argDictionary['species'] == "Cow":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        if argDictionary['species'] == "Horse":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt")
        if argDictionary['species'] == "Pig":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt")
        if argDictionary['species'] == "Zebrafish":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        
                
        pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}

        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 10
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
                history, history_client)['id'])
            return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular',
                history, history_client)['id'])
            return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html',
                history, history_client)['id'])
            return_collection.append(return_dict)
       
        # Hard code keys to define the order
        keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots',
        'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS']
        
        outFileName = 'output/' +  argDictionary['accessionNumber'] + '-workflowOutput.tsv'
        
        with open(outFileName, 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t")
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        #tool_client.upload_file(outFileName, history['id'], file_type='tsv')
        
        return return_collection
    else:  
        pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
            
       
        if argDictionary['species'] == "Mouse":  
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt")
            
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
        else:
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt")
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
    
        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 14
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}

    
        
            #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
                history, history_client)['id'])
            return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', 
                history, history_client)['id'])
            return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', 
                history, history_client)['id'])
            return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', 
                history, history_client)['id'])
            return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
                history, history_client)['id'])
            return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular',
                history, history_client)['id'])
            return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html',
                history, history_client)['id'])
            return_collection.append(return_dict)
       
        # Hard code keys to define the order
        keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable',
        'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS']
        
        outFileName = 'output/' +  argDictionary['accessionNumber'] + '-workflowOutput.tsv'
        
        with open(outFileName, 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t")
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        
        return return_collection

Example #17

0

Show file

File: runMicroarray.py Project: soulj/SkeletalVis-Pipeline

def runWorkflow(argDictionary, comparisons):
    from bioblend.galaxy import GalaxyInstance
    from bioblend.galaxy.histories import HistoryClient
    from bioblend.galaxy.tools import ToolClient
    from bioblend.galaxy.workflows import WorkflowClient
    from bioblend.galaxy.libraries import LibraryClient
    import time
    
    api_key = ''
    galaxy_host = 'http://localhost:8080/'

    gi = GalaxyInstance(url=galaxy_host, key=api_key)

    history_client = HistoryClient(gi)
    tool_client = ToolClient(gi)
    workflow_client = WorkflowClient(gi)
    library_client = LibraryClient(gi)
    
    history = history_client.create_history(row['accessionNumber'])
    # Import the galaxy workflow
    workflow = workflow_client.show_workflow('a799d38679e985db')

    input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt')

    # Run workflow on csv data to create a new history.
    params = dict()
    for key in workflow['steps'].keys():
        params[key] = argDictionary
    
    datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}}

    workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params)
    
    # A diry hack, we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    
    dataset_id = getFoldChangeData(history, history_client)['id']

    
    return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id),
    'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}]
    
    number_of_comparisons = -1
    for line in open(comparisons):
        if not line.isspace():
            number_of_comparisons += 1

    for comparison in range(0, int(number_of_comparisons)):
        tool_inputs = {
            'foldChangeTable' : {'id': dataset_id, 'src': 'hda'},
            'comparisonNumber' : comparison + 1
        }
        tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs)
        
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]:
        pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
        
        if argDictionary['species'] == "Rat":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt")
        if argDictionary['species'] == "Cow":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt")
        if argDictionary['species'] == "Horse":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt")
        if argDictionary['species'] == "Pig":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt")
        if argDictionary['species'] == "Zebrafish":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        
                
        pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}

        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 10
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            if 'Paired1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Paired1']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
            history, history_client)['id'])
            return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
            history, history_client)['id'])
            return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png',
            history, history_client)['id'])
            return_collection.append(return_dict)     
       
        # Hard code keys to define the order
        keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot']
        with open('output/' +  argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys)
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        return return_collection
    else: 
        pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
            
        # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow
        # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow
        
        if argDictionary['species'] == "Mouse":  
    
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt")
            
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
        else:
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt")
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
    
        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 14
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}

            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            if 'Paired1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Paired1']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
            history, history_client)['id'])
            return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', 
                history, history_client)['id'])
            return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', 
                history, history_client)['id'])
            return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', 
                history, history_client)['id'])
            return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
            history, history_client)['id'])
            return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png',
            history, history_client)['id'])
            return_collection.append(return_dict)     
       
        # Hard code keys to define the order
        keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot']
        with open('output/' +  argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys)
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        return return_collection

Example #18

0

Show file

__version__ = '0.1.0'

#import logging
#logging.basicConfig(level=logging.DEBUG)

upload_history_name = 'Uploaded data'
upload_history_tag = 'user_data'
workflow_tag = 'islandcompare'
workflow_owner = 'brinkmanlab'
application_tag = 'IslandCompare'
ext_to_datatype = {
    "genbank": "genbank", "gbk": "genbank", "embl": "embl", "gbff": "genbank", "newick": "newick", "nwk": "newick"
}

WorkflowClient.set_max_get_retries(5)
HistoryClient.set_max_get_retries(5)
DatasetClient.set_max_get_retries(5)
JobsClient.set_max_get_retries(5)
InvocationClient.set_max_get_retries(5)


# ======== Patched bioblend functions ===========
# TODO Remove after upgrading to v0.16.0
def get_invocations(self, workflow_id, history_id=None, user_id=None, include_terminal=True, limit=None, view='collection', step_details=False):
    url = self._invocations_url(workflow_id)
    params = {'include_terminal': include_terminal, 'view': view, 'step_details': step_details}
    if history_id: params['history_id'] = history_id
    if user_id: params['user_id'] = user_id
    if limit: params['limit'] = limit
    return self._get(url=url, params=params)