Esempio n. 1
0
 def create_clients(self):
     '''
     Create bioblend clients for the Galaxy instance.
     '''
     # Create first client and check if the API works
     self.config_client = ConfigClient(self.instance)
     try:
         self.config_client.get_version()
         self.config_client.get_config()
     except:
         logger.error("Provided API-key does not work.")
         return False
     try:
         self.user_client = UserClient(self.instance)
         self.workflow_client = WorkflowClient(self.instance)
         self.tool_client = ToolClient(self.instance)
         self.toolshed_client = ToolShedClient(self.instance)
         self.library_client = LibraryClient(self.instance)
         self.roles_client = RolesClient(self.instance)
         self.history_client = HistoryClient(self.instance)
         self.dataset_client = DatasetClient(self.instance)
     except:
         logger.error("Error initializing other bioblend clients.")
         return False
     return True
Esempio n. 2
0
def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    histories = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)

    brassica_library = libraryClient.get_libraries(
        name=' Evolutionary Systems Biology')
    files = libraryClient.show_library(brassica_library[0]['id'],
                                       contents=True)
    #print(files)
    itemp = 0
    for f in files:
        if f['type'] == 'folder':
            continue  # do nothing, try next
        #initial set
        #if itemp == 31:
        #	break

        #print ("Name " + f['name'])

        replicate = f['name'].split('_')[-1].split('.')[0]
        #print replicate
        if replicate == '1':
            itemp = itemp + 1
            if not (itemp >= 71 and itemp <= 92):
                continue
            base = f['name'].split('_')[:-1]
            #print base
            forward_name = f['name']
            reverse_name = '_'.join(base) + '_2.fastq.bz2'
            forward_id = f['id']
            files2 = libraryClient.show_library(brassica_library[0]['id'],
                                                contents=True)
            for f2 in files2:
                if f2['name'] == reverse_name:
                    reverse_id = f2['id']
            print forward_name
            print reverse_name
            new_history_name = f['name'].split('_')[7] + "_" + f['name'].split(
                '_')[-3] + "_" + f['name'].split('_')[-2]
            print new_history_name
            hist = histories.create_history(name=new_history_name)
            dataset_F = histories.upload_dataset_from_library(
                hist['id'], forward_id)
            dataset_R = histories.upload_dataset_from_library(
                hist['id'], reverse_id)
            datamap = {}
            datamap['0'] = {'src': 'hda', 'id': dataset_F['id']}
            datamap['1'] = {'src': 'hda', 'id': dataset_R['id']}
            workflows = workflowsClient.get_workflows(name="Maize HISAT 2.1")
            workflow = workflows[0]
            try:
                w = workflowsClient.run_workflow(workflow['id'],
                                                 datamap,
                                                 history_id=hist['id'])
            except:
                print('Next')
def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    histories = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)

    brassica_library = libraryClient.get_libraries(
        name=' Evolutionary Systems Biology')
    files = libraryClient.show_library(brassica_library[0]['id'],
                                       contents=True)
    #print(files)

    for f in files:
        if f['type'] == 'folder':
            continue  # do nothing, try next
        #initial set
        #if itemp == 31:
        #	break

        #print ("Name " + f['name'])

        replicate = f['name'].split('/')[-1][0]
        #print replicate
        if replicate == 'X':

            base = f['name'].split('/')[-1].split('.')[0]
            #print base
            forward_name = f['name']
            forward_id = f['id']
            print forward_name

            new_history_name = base
            print new_history_name
            hist = histories.create_history(name=new_history_name)
            dataset_F = histories.upload_dataset_from_library(
                hist['id'], forward_id)
            datamap = {}
            datamap['0'] = {'src': 'hda', 'id': dataset_F['id']}
            workflows = workflowsClient.get_workflows(
                name="Maize Small samples HISAT 2.1")
            workflow = workflows[0]
            try:
                w = workflowsClient.run_workflow(workflow['id'],
                                                 datamap,
                                                 history_id=hist['id'])
            except:
                print('Next')
Esempio n. 4
0
def sync_galaxy_files(user):
    print('check')
    # go through all the galaxylink files associated with the galaxy_instance_id
    gits = GalaxyInstanceTracking.objects.filter(galaxyuser__internal_user=user)

    # loop through galaxy instance
    for git in gits:
        print(git, 'GIT..................................')
        gflks = GalaxyFileLink.objects.filter(galaxyinstancetracking=git)

        gi, gu = get_gi_gu(user, git)
        # loop through galaxy files
        for gflk in gflks:
            dc = DatasetClient(gi)
            lc = LibraryClient(gi)

            if gflk.galaxy_library:

                mtch = dc.show_dataset(gflk.galaxy_id, hda_ldda='lda')
                print('MATCH', mtch)
                if isinstance(mtch, dict):
                    if mtch['deleted']:
                        gflk.removed = True

                else:
                    gflk.removed = True


            else:
                mtch = dc.show_dataset(gflk.galaxy_id, hda_ldda='hda')
                if isinstance(mtch, dict) and (mtch['deleted'] or mtch['purged']):
                    gflk.removed=True

            gflk.save()
def galaxy_isa_upload_datalib(pks,
                              galaxy_isa_upload_param,
                              galaxy_pass,
                              user_id,
                              celery_obj=''):

    # ... Should this just be for admin? or shall all user have ability ? .... not sure

    # update celery
    if celery_obj:
        celery_obj.update_state(state='RUNNING',
                                meta={
                                    'current': 0.1,
                                    'total': 100,
                                    'status': 'Initialising galaxy'
                                })

    # get the galaxy clients required for updating the galaxy instance
    git = galaxy_isa_upload_param.galaxyinstancetracking
    gi, gu = get_gi_gu(galaxy_isa_upload_param.added_by, git)
    lc = LibraryClient(gi)

    # Retrieve or create the base library used for all ISA folders
    lib = create_library(lc, 'mogi')

    # get all associated files for the selected ISA projects
    mfiles = get_mfile_qs(pks)
    #
    # # Add the files to Galaxy data library

    try:
        create_isa_datalib(mfiles, lib, gi, gu, galaxy_pass,
                           galaxy_isa_upload_param, user_id, celery_obj)
    except error_perm as e:
        print('ERROR CATCH', e)
        if celery_obj:
            celery_obj.update_state(state='FAILURE',
                                    meta={
                                        'current': 0.0,
                                        'total': 100,
                                        'status': 'Failed {}'.format(e)
                                    })
        return 0
    except bioblend.ConnectionError as e:
        print('ERROR CATCH', e)
        if celery_obj:
            celery_obj.update_state(state='FAILURE',
                                    meta={
                                        'current': 0.0,
                                        'total': 100,
                                        'status': 'Failed {}'.format(e)
                                    })
        return 0

    return 1
def main():
    galaxyInstance = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    toolClient = ToolClient(galaxyInstance)
    historyClient = HistoryClient(galaxyInstance)
    workflowsClient = WorkflowClient(galaxyInstance)
    libraryClient = LibraryClient(galaxyInstance)
    datasetClient = DatasetClient(galaxyInstance)

    histories = historyClient.get_histories(deleted=False)
    for hist in histories:
        hist_id = hist['id']
        countSecondary = historyClient.show_matching_datasets(
            hist_id, name_filter=name_filter)
        if len(countSecondary) != 0:
            #print(countSecondary)
            file_path = dir_name + '/' + hist[
                'name'] + '_' + name_filter + '.' + ext
            #print(file_path)
            #print(countSecondary[0]['dataset_id'])
            datasetClient.download_dataset(countSecondary[0]['id'],
                                           file_path=file_path,
                                           use_default_filename=False)
    sys.exit()
Esempio n. 7
0
def f2dl_action(gfile_ids, f2dl_param, galaxy_pass):

    # get selected files
    selected_files = GenericFile.objects.filter(pk__in=gfile_ids)
    galaxy_folder = f2dl_param.folder_name

    # get the Galaxy Bioblend clients
    git = f2dl_param.galaxyinstancetracking
    gi, gu = get_gi_gu(f2dl_param.added_by, git)

    lc = LibraryClient(gi)

    # Create base library (just output the lib bioblend object if already created)
    lib = create_library(lc, f2dl_param.added_by.username)

    # get full paths from database
    filelist = files2paths(selected_files)
    if not filelist:
        print('filelist empty')
        return []

    # Create the folders in Galaxy data library (can be nested if user used forward slashses)
    folders = galaxy_folder.split('/')
    folder_id = create_folders(lc, lib['id'], base_f_id=None, folders=folders)
    lib_id = lib['id']

    # upload the files to the folder
    uploaded_files = add_filelist_datalib(filelist, f2dl_param, lc, gu, gi,
                                          galaxy_pass, lib_id, folder_id,
                                          galaxy_folder)

    # link files to django database
    link_files_in_galaxy(uploaded_files, selected_files, git, library=True)

    # check purged files are reference in the database correctly
    sync_galaxy_files(f2dl_param.added_by)
def runWorkflow(argDictionary, comparisons):
    from bioblend.galaxy import GalaxyInstance
    from bioblend.galaxy.histories import HistoryClient
    from bioblend.galaxy.tools import ToolClient
    from bioblend.galaxy.workflows import WorkflowClient
    from bioblend.galaxy.libraries import LibraryClient
    import time
    
    api_key = ''
    galaxy_host = 'http://localhost:8080/'

    gi = GalaxyInstance(url=galaxy_host, key=api_key)

    history_client = HistoryClient(gi)
    tool_client = ToolClient(gi)
    workflow_client = WorkflowClient(gi)
    library_client = LibraryClient(gi)
    
    history = history_client.create_history(row['accessionNumber'])
    # Import the galaxy workflow
    workflow = workflow_client.show_workflow('a799d38679e985db')

    input_file = tool_client.upload_file(comparisons, history['id'], file_type='txt')

    # Run workflow on csv data to create a new history.
    params = dict()
    for key in workflow['steps'].keys():
        params[key] = argDictionary
    
    datamap = {'1' : {'id': input_file['outputs'][0]['id'], 'src': 'hda'}}

    workflow_client.invoke_workflow(workflow['id'], inputs = datamap, history_id = history['id'], params = params)
    
    # A diry hack, we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    
    dataset_id = getFoldChangeData(history, history_client)['id']

    
    return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id),
    'PCA': getUrl(getMostRecentDatasetByName('PCAplot.png', history, history_client)['id']),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}]
    
    number_of_comparisons = -1
    for line in open(comparisons):
        if not line.isspace():
            number_of_comparisons += 1

    for comparison in range(0, int(number_of_comparisons)):
        tool_inputs = {
            'foldChangeTable' : {'id': dataset_id, 'src': 'hda'},
            'comparisonNumber' : comparison + 1
        }
        tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs)
        
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]:
        pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
        
        if argDictionary['species'] == "Rat":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.rat.txt")
        if argDictionary['species'] == "Cow":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.cow.txt")
        if argDictionary['species'] == "Horse":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt")
        if argDictionary['species'] == "Pig":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt")
        if argDictionary['species'] == "Zebrafish":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        
                
        pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}

        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 10
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            if 'Paired1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Paired1']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
            history, history_client)['id'])
            return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
            history, history_client)['id'])
            return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png',
            history, history_client)['id'])
            return_collection.append(return_dict)     
       
        # Hard code keys to define the order
        keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot']
        with open('output/' +  argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys)
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        return return_collection
    else: 
        pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
            
        # MouseGeneLengths.tab has id 457f69dd7016f307 - step 2 of workflow
        # Mouse interactome has id 073be90ac6c3bce5 - step 0 of workflow
        
        if argDictionary['species'] == "Mouse":  
    
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt")
            
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
        else:
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt")
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
    
        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 14
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}

            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            if 'Paired1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Paired1']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
            history, history_client)['id'])
            return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', 
                history, history_client)['id'])
            return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', 
                history, history_client)['id'])
            return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', 
                history, history_client)['id'])
            return_dict['slimEnrichmentPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
            history, history_client)['id'])
            return_dict['slimEnrichmentPlot'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPlot.png',
            history, history_client)['id'])
            return_collection.append(return_dict)     
       
        # Hard code keys to define the order
        keys = ['accessionNo','factor','comparisonNum','comparisonDenom','PCA','chrDirTable','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot','enrichmentTable','slimEnrichmentPathways','slimEnrichmentPlot']
        with open('output/' +  argDictionary['accessionNumber'] + '-workflowOutput.csv', 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys)
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        return return_collection
Esempio n. 9
0
class GalaxyHandler:
    '''
    This class represents a Galaxy instance and provides functions to interact with that instance.
    '''
    def __init__(self, url, api_key, container_file=None, oci_bundle=False):
        self.url = url
        self.api_key = api_key
        self.container_file = container_file
        self.oci_bundle = oci_bundle

        # Bioblend GalaxyInstance
        self.instance = None
        # Bioblend Clients
        self.user_client = None
        self.config_client = None
        self.workflow_client = None
        self.tool_client = None
        self.toolshed_client = None
        self.library_client = None
        self.roles_client = None
        self.history_client = None
        self.dataset_client = None

    def start_container_galaxy(self, writable=False, binds=None):
        '''
        Run a containerized Galaxy instance.
        '''
        with open(os.devnull, 'w') as FNULL:
            if self.oci_bundle:
                subprocess.call([
                    "sh", "/galaxy/run.sh", "--log-file", "/output/paster.log",
                    "--pid-file", " /output/paster.pid", "--daemon"
                ],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT)
            else:
                if writable:
                    subprocess.call([
                        "sudo", "singularity", "exec", "-w",
                        self.container_file, "sh", "/galaxy/run.sh", "--daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                elif binds:
                    subprocess.call([
                        "singularity", "exec", "--bind", binds,
                        self.container_file, "sh", "/galaxy/run.sh",
                        "--log-file", "/output/paster.log", "--pid-file",
                        " /output/paster.pid", "--daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                else:
                    subprocess.call([
                        "singularity", "exec", self.container_file, "sh",
                        "/galaxy/run.sh", "--daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)

            # Wait until the Galaxy instance is available but do not wait longer than 1 minute
            response = None
            t = 0
            while not response:
                try:
                    response = urllib.urlopen(
                        self.url).getcode()  # returns 200 if galaxy is up
                except:
                    if t > 60:
                        logger.error(
                            "Galaxy is not up after 1 minute. Something went wrong. Maybe the container is corrupted. Try to open a shell in writable mode in the container and start Galaxy from the shell"
                        )
                        exit(1)
                    else:
                        # Wait 5s until Galaxy is up
                        logger.info(
                            "Galaxy is not up ... wait 5 seconds and try again"
                        )
                        t = t + 5
                        time.sleep(5)
                        response = None
                        continue
            self.instance_running = True
        return

    def stop_container_galaxy(self, sudo=False, bind_dirs=None, tmp_dir=None):
        '''
        Stop a running containerized Galaxy instance.
        Remove an existing temporary directory
        '''
        with open(os.devnull, 'w') as FNULL:
            if self.oci_bundle:
                # No binds, no Singularity, just plain run.sh stop-daemon
                subprocess.call(["sh", "/galaxy/run.sh", "--stop-daemon"],
                                stdout=FNULL,
                                stderr=subprocess.STDOUT)
                self.instance_running = False
                time.sleep(5)
            else:
                if sudo:
                    # We use sudo only for importing workflows, so no binds.
                    subprocess.call([
                        "sudo", "singularity", "exec", "-w",
                        self.container_file, "sh", "/galaxy/run.sh",
                        "--stop-daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                    self.instance_running = False
                    time.sleep(5)
                else:
                    # We this only for workflow execution
                    subprocess.call([
                        "singularity", "exec", "--bind", bind_dirs,
                        self.container_file, "sh", "/galaxy/run.sh",
                        "--log-file", "/output/paster.log", "--pid-file",
                        " /output/paster.pid", "--stop-daemon"
                    ],
                                    stdout=FNULL,
                                    stderr=subprocess.STDOUT)
                    self.instance_running = False
                    time.sleep(5)

        # Remove temporary directories
        if tmp_dir:
            logger.info("Remove temporary directory: %s", tmp_dir)
            shutil.rmtree(tmp_dir)

        return

    def create_galaxy_instance(self, check_admin=False):
        '''
        Create a bioblend GalaxyInstance.
        If check_admin = True, check if the user is admin of the galaxy instance. If not, return None.
        Returns False if an error occurs.
        '''
        # Check if the URL is valid
        if not check_url(self.url):
            logger.error("URL to galaxy instance is not a valid URL: %s",
                         self.url)
            return False
        # Try to create a bioblend Galaxy instance
        try:
            self.instance = GalaxyInstance(url=self.url, key=self.api_key)
        except:
            logger.error("Cannot create Galaxy instance.")
            return False
        return True

    def create_clients(self):
        '''
        Create bioblend clients for the Galaxy instance.
        '''
        # Create first client and check if the API works
        self.config_client = ConfigClient(self.instance)
        try:
            self.config_client.get_version()
            self.config_client.get_config()
        except:
            logger.error("Provided API-key does not work.")
            return False
        try:
            self.user_client = UserClient(self.instance)
            self.workflow_client = WorkflowClient(self.instance)
            self.tool_client = ToolClient(self.instance)
            self.toolshed_client = ToolShedClient(self.instance)
            self.library_client = LibraryClient(self.instance)
            self.roles_client = RolesClient(self.instance)
            self.history_client = HistoryClient(self.instance)
            self.dataset_client = DatasetClient(self.instance)
        except:
            logger.error("Error initializing other bioblend clients.")
            return False
        return True

    def initialize(self):
        '''
        Initialize bioblend GalaxyInstance, clients, and check if the API works.
        Returns False if something went wrong.
        '''
        if not self.create_galaxy_instance():
            logger.error(
                "Cannot create bioblend GalaxyInstance for the GalaxyHandler")
            return False
        if not self.create_clients():
            logger.error(
                "Cannot create bioblend clients for the GalaxyHandler")
            return False
        return True

    def create_user(self, name, mail, password):
        '''
        Create a new Galaxy user for an specific Galaxy instance.
        Return the user_id and an api-key.
        '''
        try:
            new_user = self.user_client.create_local_user(name, mail, password)
        except ConnectionError as e:
            # User already exists
            if "already exists" in e.body:
                new_user = self.user_client.get_users(f_email=mail)[0]
        new_user_id = new_user['id']

        # Create API key for that user
        new_user_api_key = self.user_client.create_user_apikey(new_user_id)

        return (new_user_id, new_user_api_key)

    def create_input_library(self, name, user):
        '''
        Create a dataset library for this instance.
        '''
        try:
            # Create the library
            new_library = self.library_client.create_library(name,
                                                             description=None,
                                                             synopsis=None)
            logger.info("new_library ok")
            # Get the role of the user
            user_role_id = self.roles_client.get_roles()[0]['id']
            logger.info("user_role_id ok")
            # Set permissions for that library
            # The following settings will enable the upload of input data by the user to this libary
            self.library_client.set_library_permissions(
                library_id=new_library['id'],
                access_in=user_role_id,
                modify_in=user_role_id,
                add_in=user_role_id,
                manage_in=user_role_id)
            return True
        except:
            logger.error("Cannot create Galaxy data library")
            return False

    def create_history(self, name):
        '''
        Create a history and return the history id
        '''
        history_dict = self.history_client.create_history(name)
        return history_dict['id']

    def create_folder(self, library_name, user_mail):
        '''
        Create a folder for the files in a library.
        This is used to store files for the a Galaxy library.
        Return a tuple containing the library id and the folder id.
        '''
        # Assume that there is just one library with this name
        library = self.library_client.get_libraries(library_id=None,
                                                    name=library_name,
                                                    deleted=False)[0]
        folder = self.library_client.create_folder(library['id'], user_mail)
        return library['id'], folder[0]['id']

    def upload_workflow_input(self,
                              workflow_input,
                              library_id,
                              folder_id,
                              mount_input_dir=True,
                              input_dir=None):
        '''
        Upload the input data for a workflow to Galaxy.
        The files are uploaded from the filesystem to a folder of an Galaxy library.
        The files are not duplicated, because just symbolic links will be created.
        If a user provides his own data, the files are 'uploaded' from the /input directory,
        which is just a mount point for a directory outside the container.
        If a user wants to use test data provided with the container, mount_input_dir is False
        and the directory inside the container has to be specified.
        '''
        for step_uuid, step_param in workflow_input.iteritems():
            if step_param['step_type'] == 'data_input':
                if mount_input_dir:
                    # Input data is mounted in the container
                    path = os.path.join('/input', step_param['filename'])
                else:
                    # input_dir exists inside the container (e.g. workflow test data)
                    path = os.path.join(input_dir, step_param['filename'])
                logger.info("Next upload: " + path)
                workflow_input[step_uuid][
                    'dataset_id'] = self.library_client.upload_from_galaxy_filesystem(
                        library_id,
                        path,
                        folder_id=folder_id,
                        file_type=step_param['galaxy_file_type'],
                        link_data_only='link_to_files')

    def export_output_history(self, history_id, output_dir):
        '''
        Export all datasets of a history to the output directory.
        '''
        # Get a list of all datasets in the output history
        history_datasets = self.history_client.show_history(history_id,
                                                            contents=True,
                                                            deleted=None,
                                                            visible=None,
                                                            details=None,
                                                            types=None)

        # Iterate over the datasets of the history and download each dataset that has 'ok' state (e.g. the tool completed)
        for dataset in history_datasets:
            # Check the dataset status, e.g. if the corresponding task completed. Do not download input datasets!
            if dataset['state'] == 'ok':
                logger.info("Download dataset %s, state: %s", dataset['name'],
                            dataset['state'])
                self.dataset_client.download_dataset(dataset['id'],
                                                     file_path=output_dir,
                                                     use_default_filename=True,
                                                     wait_for_completion=False,
                                                     maxwait=12000)
            else:
                logger.info("Do not download dataset %s, state: %s",
                            dataset['name'], dataset['state'])
def create_isa_datalib(mfiles,
                       lib,
                       gi,
                       gu,
                       galaxy_pass,
                       galaxy_isa_upload_param,
                       user_id,
                       celery_obj=''):
    name_map = get_namemap()
    igrps = group_by_keys(mfiles, (name_map['investigation'], ))

    file_count = 0

    lc = LibraryClient(gi)
    fc = FoldersClient(gi)

    for igrp in igrps:
        # get the investigation name of the group, and create folder
        ifolder, sgrps = create_investigation_folder(igrp, lc, fc, lib,
                                                     galaxy_isa_upload_param,
                                                     name_map)

        # create samplelist (and create relevant misa file)
        samplelist_misafile_id = create_samplelist(user_id, igrp)

        # Upload all generic MISA files (including the one above we just created)
        investigation_n = igrp[0][name_map['investigation']]

        misa_files = MISAFile.objects.filter(
            investigation__name=investigation_n)

        for misafile in misa_files:
            save_to_galaxy([misafile], galaxy_isa_upload_param, lc, gu, gi,
                           galaxy_pass, lib['id'], ifolder['id'],
                           misafile.original_filename, True)

        for sgrp in sgrps:
            # get the study name of the group and create folder
            sfolder, agrps = create_study_folder(sgrp, lc, lib, name_map,
                                                 ifolder)

            for agrp in agrps:

                study_n = agrp[0][name_map['study']]
                investigation_n = agrp[0][name_map['investigation']]
                assay_n = agrp[0][name_map['assay']]

                full_assay_name = '{}_{}_{}'.format(study_n, investigation_n,
                                                    assay_n)

                if celery_obj:
                    if file_count == 0:
                        count = 0.1
                    else:
                        count = file_count
                    celery_obj.update_state(state='RUNNING',
                                            meta={
                                                'current':
                                                count,
                                                'total':
                                                len(mfiles) + 1,
                                                'status':
                                                'Assay: {}'.format(assay_n)
                                            })

                afolder = create_assay_folder(agrp, lc, lib, name_map, sfolder)

                data_lib_files = save_to_galaxy(agrp, galaxy_isa_upload_param,
                                                lc, gu, gi, galaxy_pass,
                                                lib['id'], afolder['id'],
                                                full_assay_name)
                #
                file_count += len(data_lib_files)
Esempio n. 11
0
                    remote_dataset=True,
                    folder_id=folder_id,
                    uuid_list=uuid_string,
                    remote_dataset_type_list=remote_dataset_type_string,
                    file_size_list=file_size_string,
                    line_count_list=line_count_string)


if __name__ == "__main__":
    if (len(sys.argv) < 3):
        sys.stderr.write(
            'Needs 2 arguments : <Galaxy_library_name> <csv_file>\n')
        sys.exit(-1)
    #Connect to Galaxy
    gi = GalaxyInstance(url=galaxy_key.galaxy_host, key=galaxy_key.galaxy_key)
    li = LibraryClient(gi)

    library_name = sys.argv[1]
    csv_file = sys.argv[2]
    try:
        fptr = open(csv_file, 'rb')
    except:
        sys.stderr.write('Could not open CSV file\n')
        sys.exit(-1)

    upload_file_info_list = []
    #try:
    #dialect,id_field_name,fieldnames = import_datasets_by_uuid.check_and_return_header(fptr, delimiter=',');
    #csv_reader = csv.DictReader(fptr, fieldnames=fieldnames, dialect=dialect);
    #for row in csv_reader:
    #info = UploadFileInfo();
def main():
    parser = OptionParser()
    parser.add_option("-A",
                      "--auth-file",
                      dest="auth_filename",
                      help="JSON file with Galaxy host and key",
                      metavar="FILE")
    parser.add_option(
        "-f",
        "--uuid-file",
        dest="uuids_filename",
        help=
        "TSV file with list of UUIDs to import. The first row is assumed to be a header",
        metavar="FILE")
    parser.add_option(
        "-H",
        "--target-history",
        dest="target_history",
        help="Target history name in Galaxy to copy datasets into",
        metavar="HISTORY_NAME")
    (options, args) = parser.parse_args()
    if (not options.auth_filename):
        print_error_and_exit('Authentication file not provided')
    #if(not options.uuids_filename):
    #print_error_and_exit('TSV file with UUIDs not provided');
    if (not options.target_history):
        print_error_and_exit(
            'Galaxy history name where datasets will be imported not provided')

    #Read authentication info
    galaxy_host, galaxy_key = parse_auth_file(options.auth_filename)

    gi = GalaxyInstance(url=galaxy_host, key=galaxy_key)
    history_client = HistoryClient(gi)
    library_client = LibraryClient(gi)
    folder_client = FoldersClient(gi)

    #Read UUIDs file
    if (options.uuids_filename):
        try:
            uuids_fd = open(options.uuids_filename, 'rb')
        except IOError:
            print_error_and_exit('Could not open TSV file with UUIDs ' +
                                 options.uuids_filename)
    else:
        uuids_fd = sys.stdin
    queried_ds_uuid_dict = parse_TSV_file(uuids_fd)

    #Search for datasets
    find_datasets_by_uuids_in_histories(gi, history_client,
                                        queried_ds_uuid_dict)
    find_datasets_by_uuids_in_libraries(gi, library_client,
                                        queried_ds_uuid_dict)

    dataset_info_list = queried_ds_uuid_dict.values()
    #Validate datasets, discard repeats
    validate_queried_dataset_info(dataset_info_list)

    #Get/create target history
    target_history_id = get_or_create_history_id(gi, history_client,
                                                 options.target_history)
    #Copy datasets from library to history
    copy_from_lib(gi,
                  history_client,
                  dataset_info_list,
                  target_history_id=target_history_id)
    #Copy from history to /tmp and back - don't use anymore
    #copy_to_tmp_lib_and_back(gi, library_client, history_client, folder_client, '/tmp', dataset_info_list, target_history_id=target_history_id);
    #Copy history datasets from other histories
    copy_other_history_datasets(gi,
                                history_client,
                                dataset_info_list,
                                target_history_id=target_history_id)
    #Create dataset collections
    create_dataset_collections(gi,
                               history_client,
                               dataset_info_list,
                               target_history_id=target_history_id)
Esempio n. 13
0
parser = SafeConfigParser()

if len(sys.argv) >= 2:
    if sys.argv[1].endswith('.ini'):
        parser.read(sys.argv[1])
    else:
        print "You passed %s I need a .ini file" %(sys.argv[1],)
        sys.exit(1)
else:
    parser.read('configuration.ini')

api_key = get_api_key(parser.get('Globals', 'api_file'))


galaxy_host = parser.get('Globals', 'galaxy_host')
galaxyInstance = GalaxyInstance(galaxy_host, key=api_key)
libraryInstance = LibraryClient(galaxyInstance)
libs = libraryInstance.get_libraries(name=parser.get('Globals','default_lib'))
details = libraryInstance.get_folders(library_id=libs[0]['id'])
folder = libraryInstance.show_library(library_id=libs[0]['id'],contents=True)
for f in folder[1:]:
    print "%s:%s" % (f['name'],f['id'])

workflow = WorkflowClient(galaxyInstance)
wf = workflow.get_workflows()
for w in wf:
    print w['id']
    print w['name']
    print w['owner']

Esempio n. 14
0
def runWorkflow(argDictionary, comparisons,samples):
    from bioblend.galaxy import GalaxyInstance
    from bioblend.galaxy.histories import HistoryClient
    from bioblend.galaxy.tools import ToolClient
    from bioblend.galaxy.workflows import WorkflowClient
    from bioblend.galaxy.libraries import LibraryClient
    import tempfile
    
    
    import time
    api_key = ''
    galaxy_host = ''

    gi = GalaxyInstance(url=galaxy_host, key=api_key)

    history_client = HistoryClient(gi)
    tool_client = ToolClient(gi)
    workflow_client = WorkflowClient(gi)
    library_client = LibraryClient(gi)
    
    history = history_client.create_history(argDictionary['accessionNumber'])
    
    comparisonsTable = tool_client.upload_file(comparisons, history['id'], file_type='txt')
    sampleTable = tool_client.upload_file(samples, history['id'], file_type='tabular')
    
    if argDictionary['site'] == "ENA":
        #fastqs available on ENA    
        tool_inputs = {
                "accessionNumber":argDictionary["ENA"],"sampleTable":{'id': sampleTable['outputs'][0]['id'], 'src': 'hda'}
                
            }
        
    
        #run the tool to get the data from ENA
        tool_client.run_tool(history['id'],'getRNASeqExpressionData', tool_inputs)
        
        #we want to wait until we have all datasets
        while getNumberNotComplete(history['id'], history_client) > 0:
            time.sleep(10)
            
        
        #sleep until all the fastq files are findable
        time.sleep(120)
        
        
        dirpath = tempfile.mkdtemp()
        fileList = getDatasetsByApproxName("files.tabular", history,history_client)[0]
        fileList = history_client.download_dataset(history["id"],fileList["id"],dirpath)
        num_lines = sum(1 for line in open(fileList)) -1
        
        datasets=list()
        while len(datasets)!=num_lines:
                    time.sleep(10)
                    datasets = getDatasetsByApproxName("fastq",history,history_client )                
    else: #for SRA       
    
        if argDictionary['single'] == "TRUE":
            with open(samples) as tsvfile:
                reader = csv.DictReader(tsvfile, delimiter='\t')
                for sample in reader:
                    print (sample)
                    fileNames=str.split(sample["File"],"|")
                    for fileName in fileNames:                    
                        tool_inputs = {
                                "input|input_select":"accession_number",
                                "outputformat":"fastqsanger.gz",
                                "input|accession":fileName   
                            }
                        #run the tool to get the single data from SRA
                        tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fastq_dump/2.8.1.3', tool_inputs)
               
        else:
             with open(samples) as tsvfile:
                reader = csv.DictReader(tsvfile, delimiter='\t')
           
                for sample in reader:            
                    tool_inputs = {
                            "accession_number":sample["File"]           
                        }
                    #run the tool to get the paired data from SRA
                    tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/mandorodriguez/fastqdump_paired/fastq_dump_paired/1.1.4', tool_inputs)
                
        while getNumberNotComplete(history['id'], history_client) > 0:
            time.sleep(10)
     
    datasets = getDatasetsByApproxName("fastq",history,history_client )
    #get the fastQC tool
    for fastq in datasets:
        try:
            tool_inputs = {'input_file' : {'id': fastq['id'], 'src': 'hda'}}
            tool_client.run_tool(history['id'],'toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69', tool_inputs)
        except Exception:
            pass
        
    #wait till complete
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
    
    #make dataset collections for quantification using the fastq files
    collections=list()
    with open(samples) as tsvfile:
        reader = csv.DictReader(tsvfile, delimiter='\t')
        for row in reader:
            datasets=list()
            fileNames=str.split(row["File"],"|")
            
            for fileName in fileNames:
                datasets= datasets + getDatasetsByApproxName(fileName,history,history_client )
                    
            #make list of datasets
            collections.append(makeDataSetCollection(datasets,row["Sample"],history,history_client))
            
            
            
    #get the correct kallisto index
    species = argDictionary['species'].lower()
    index = getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name=species +"IndexFile")
    index = {'id': index, 'src': 'hda'}
    
    #run kallisto for every dataset collection
    for collection in collections:
        #set up the tool_inputs
        tool_inputs = {'index' : index,'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"single":argDictionary["single"],"stranded":argDictionary["stranded"]}
        
        
        #often encounter connection broken error - possible problem with Certus server?
        #bypass by ignoring the exception
        tool_client.run_tool(history['id'],'kallistoQuant', tool_inputs)


    # we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
    # Run multiqc on kallisto logs and fastqc files
    datasets = getDatasetsByApproxName("RawData",history,history_client )
    kallistoLogs = getDatasetsByApproxName(".log", history, history_client)
    
    tool_inputs = {}
    for i, dataset in enumerate(datasets+kallistoLogs):
        if not dataset["deleted"]:
            if dataset in datasets:
                software = 'fastqc'
            else:
                software = 'kallisto'
            params = {'id' : dataset['id'], 'src': 'hda', 'name': dataset['name']}
            tool_inputs.update({'results_%s|software_cond|software' % i: software, 'results_%s|input_file' % i: params})

#    #summarise with the multiQC tool
    tool_client.run_tool(history['id'],'multiqc', tool_inputs)
    
    multiQc = getDatasetsByApproxName("multiqc",history,history_client)[0]
    
        
    #get all the abundance files to convert to gene level counts matrix
    datasets = getDatasetsByApproxName(".abundance",history,history_client )
    
    #make a dataset collection for to make a countsMatrix
    collection = makeDataSetCollection(datasets,"abundances",history,history_client)
    
    
    #set up the tool_inputs
    tool_inputs = {'inputs' : {'id': collection['id'], 'src': 'hdca'} ,"species":argDictionary['species']}
    
    #convert abundances to gene level counts matrix
    tool_client.run_tool(history['id'],'KallistoAbundancestoGeneCountMatrix', tool_inputs)
    
    # A diry hack, we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
    
    txi = getDatasetsByApproxName("txi",history,history_client)
    

    #set up the tool_inputs for PCA
    tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']}
    
    #run deseq2
    tool_client.run_tool(history['id'],'PCARNASeq', tool_inputs)
    
    pca = getDatasetsByApproxName("PCA",history,history_client)[0]
    
       
    #set up the tool_inputs for DESeq2
    tool_inputs = {'txiData' : {'id': txi[0]['id'], 'src': 'hda'} ,'sampleTable' : {'id': sampleTable['outputs'][0]['id'], 'src': 'hda'} ,
    'comparisonsTable' : {'id': comparisonsTable['outputs'][0]['id'], 'src': 'hda'} ,"foldChangeOnly":argDictionary['foldChangeOnly'],"species":argDictionary['species'],'technicalReplicates':argDictionary['technicalReplicates'],'batchCorrect':argDictionary['batchCorrect']}
    
    #run deseq2
    tool_client.run_tool(history['id'],'DESeq2FoldChange', tool_inputs)
         
    #run chrdir
    tool_client.run_tool(history['id'],'characteristicDirectionRNASeq', tool_inputs)
    
        #we want to wait until we have all datasets
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
        
    #get the foldchange data, cut and run pathway workflow    
    dataset_id = getFoldChangeData(history, history_client)['id']
    
    
    return_collection = [{'accessionNo':argDictionary['accessionNumber'], 'foldChange': getUrl(dataset_id), 'PCA': getUrl(pca["id"]),'chrDirTable': getUrl(getMostRecentDatasetByName('chrDirTable.tabular', history, history_client)['id'])}]
    
    
    number_of_comparisons = -1
    for line in open(comparisons):
        if not line.isspace():
            number_of_comparisons += 1

    for comparison in range(0, int(number_of_comparisons)):
        tool_inputs = {
            'foldChangeTable' : {'id': dataset_id, 'src': 'hda'},
            'comparisonNumber' : comparison + 1
        }
        tool_client.run_tool(history['id'], 'cutFoldChangeTable', tool_inputs)
        
    while getNumberNotComplete(history['id'], history_client) > 0:
        time.sleep(10)
        
        
    if argDictionary['species'] in ["Rat","Cow","Horse","Pig","Zebrafish"]:
        pathwayAnalysisWorkflow = workflow_client.show_workflow('c9468fdb6dc5c5f1')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
        
        if argDictionary['species'] == "Rat":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="ratGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        if argDictionary['species'] == "Cow":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="cowGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        if argDictionary['species'] == "Horse":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="horseGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.horse.txt")
        if argDictionary['species'] == "Pig":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigStringNetwork.txt")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="pigGeneLengths.tabular")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.pig.txt")
        if argDictionary['species'] == "Zebrafish":
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="zebrafishGeneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="HOM_AllOrganism.rpt")
        
                
        pathwayDatamap = {'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}

        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 10
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)                  
            
            
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
                history, history_client)['id'])
            return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular',
                history, history_client)['id'])
            return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html',
                history, history_client)['id'])
            return_collection.append(return_dict)
       
        # Hard code keys to define the order
        keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots',
        'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS']
        
        outFileName = 'output/' +  argDictionary['accessionNumber'] + '-workflowOutput.tsv'
        
        with open(outFileName, 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t")
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        #tool_client.upload_file(outFileName, history['id'], file_type='tsv')
        
        return return_collection
    else:  
        pathwayAnalysisWorkflow = workflow_client.show_workflow('e85a3be143d5905b')
        
        params = dict()
        for key in pathwayAnalysisWorkflow['steps'].keys():
            params[key] = argDictionary
            
       
        if argDictionary['species'] == "Mouse":  
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="mouseStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="MouseGeneLengths.tab")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-mouse.txt")
            
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
        else:
        
            network=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="humanStringNetwork")
            geneLengths=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="geneLengths")
            homology=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="Homology.mouse.txt")
            secretedReference=getLibraryToolDataID(history=history,history_client=history_client,library_client=library_client,name="uniprot-secreted-human.txt")
            pathwayDatamap = {'4' : {'id':  secretedReference, 'src': 'hda'},'3' : {'id': homology, 'src': 'hda'},'2' : {'id': network, 'src': 'hda'},'1' : {'id': geneLengths, 'src': 'hda'}}
    
        diffExpDataCollection = getDatasetsByName('cutTable.tabular', history, history_client)
        for index, diffExpData in enumerate(diffExpDataCollection):
            
            numCompleted = getNumberComplete(history['id'], history_client) + 14
            print(numCompleted)
            
            pathwayDatamap["0"] = {'id': diffExpData['id'], 'src': 'hda'}

    
        
            #pathwayDatamap['1'] = {'id': diffExpData['id'], 'src': 'hda'}
            workflow_client.invoke_workflow(pathwayAnalysisWorkflow['id'], 
                                            inputs = pathwayDatamap, 
                                            history_id = history['id'], 
                                            params = params)
            comparisonDict = getRowFromCsv(comparisons, index)
            
            if 'Factor1' in comparisonDict.keys():
                comparisonDict['Factor'] = comparisonDict['Factor1'] + "." + comparisonDict['Factor2']
                
            return_dict = {'accessionNo':argDictionary['accessionNumber'],
                           'factor':comparisonDict['Factor'],
                           'comparisonNum':comparisonDict['Numerator'],
                           'comparisonDenom':comparisonDict['Denominator'],
                           'foldChange': getUrl(diffExpData['id']),
                           'interactome': pathwayDatamap['0']['id'],
                           'exonLength': pathwayDatamap['2']['id']}
            
            while getNumberComplete(history['id'], history_client) < numCompleted:
                time.sleep(10)
    
            return_dict['moduleNodes'] = getUrl(getMostRecentDatasetByName('moduleNodes.text', 
                history, history_client)['id'])
            return_dict['modulePlots'] = getUrl(getMostRecentDatasetByName('modulePlots.pdf',
                history, history_client)['id'])
            return_dict['pathways'] = getUrl(getMostRecentDatasetByName('pathways.tabular', 
                history, history_client)['id'])
            return_dict['enrichPlot'] = getUrl(getMostRecentDatasetByName('enrichmentPlot.png', 
                history, history_client)['id'])
            return_dict['enrichmentTable'] = getUrl(getMostRecentDatasetByName('TF_EnrichmentTable.tabular', 
                history, history_client)['id'])
            return_dict['slimEnrichPathways'] = getUrl(getMostRecentDatasetByName('slimEnrichmentPathways.tabular',
                history, history_client)['id'])
            return_dict['secretedProteins'] = getUrl(getMostRecentDatasetByName('secretedProteins.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsReverse'] = getUrl(getMostRecentDatasetByName('enrichedDrugsReverse.tabular',
                history, history_client)['id'])
            return_dict['enrichedDrugsMimic'] = getUrl(getMostRecentDatasetByName('enrichedDrugsMimic.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms'] = getUrl(getMostRecentDatasetByName('enrichedTerms.tabular',
                history, history_client)['id'])
            return_dict['enrichedTerms.reduced'] = getUrl(getMostRecentDatasetByName('enrichedTerms.reduced.tabular',
                history, history_client)['id'])
            return_dict['GO.MDS'] = getUrl(getMostRecentDatasetByName('GO.MDS.html',
                history, history_client)['id'])
            return_collection.append(return_dict)
       
        # Hard code keys to define the order
        keys = ['accessionNo','multiQC','factor','PCA','chrDirTable','comparisonNum','comparisonDenom','foldChange',
        'interactome','exonLength','moduleNodes','modulePlots','pathways','enrichPlot', 'enrichmentTable',
        'slimEnrichPathways','secretedProteins','enrichedDrugsReverse','enrichedDrugsMimic','enrichedTerms','enrichedTerms.reduced','GO.MDS']
        
        outFileName = 'output/' +  argDictionary['accessionNumber'] + '-workflowOutput.tsv'
        
        with open(outFileName, 'wb') as csvFile:
            # Get headers from last dictionary in collection as first doesn't contain all keys
            csvOutput = csv.DictWriter(csvFile, keys, delimiter = "\t")
            csvOutput.writeheader()
            csvOutput.writerows(return_collection)
            
        
        return return_collection
Esempio n. 15
0
parser.add_argument("-a", "--apikey")
parser.add_argument("-e", "--endpoint")
parser.add_argument("-p", "--port")
parser.add_argument("-s", "--sourcedir")

args = parser.parse_args()

host = "127.0.0.1" if not args.endpoint else args.endpoint
port = "8080"
addr = host + ":{}".format(port) if port else ""

apik = args.apikey

gi = GalaxyInstance(addr, apik)
lc = LibraryClient(gi)
fc = FoldersClient(gi)
hc = HistoryClient(gi)

library_name = "GDC Files"
library_description = "A library of files acquired from the NCI Genomic Data Commons (GDC)"
libs=lc.get_libraries()
lib = {}

if libs and isinstance(libs, dict):
    libs = [libs]
if libs:
    for _lib in libs:
        if "name" in _lib and _lib["name"] == library_name:
            lib = _lib
else: