Ejemplo n.º 1
0
def executeParameters(species,array_type,force,genomic_build,update_uniprot,update_ensembl,update_probeset_to_ensembl,update_domain,update_miRs,update_all,update_miR_seq,ensembl_version):    
    if '|' in array_type: array_type, specific_array_type = string.split(array_type,'|') ### To destinguish between array sub-types, like the HJAY and hGlue
    else: specific_array_type = array_type
    
    if update_all == 'yes':
        update_uniprot='yes'; update_ensembl='yes'; update_probeset_to_ensembl='yes'; update_domain='yes'; update_miRs = 'yes'
        
    if update_ensembl == 'yes':
        import EnsemblSQL; reload(EnsemblSQL)

        """ Used to grab all essential Ensembl annotations previously obtained via BioMart"""        
        configType = 'Advanced'; analysisType = 'AltAnalyzeDBs'; externalDBName = ''
        EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species,configType,analysisType,externalDBName,ensembl_version,force)
        
        """ Used to grab Ensembl-to-External gene associations"""
        configType = 'Basic'; analysisType = 'ExternalOnly'; externalDBName = 'Uniprot/SWISSPROT'
        EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species,configType,analysisType,externalDBName,ensembl_version,force)
        
        """ Used to grab Ensembl full gene sequence plus promoter and 3'UTRs """
        if array_type == 'AltMouse' or array_type == 'junction' or array_type == 'RNASeq':
            EnsemblSQL.getFullGeneSequences(ensembl_version,species)
            
    if update_uniprot == 'yes':            
        ###Might need to delete the existing versions of downloaded databases or force download
        buildUniProtFunctAnnotations(species,force)
                
    if update_probeset_to_ensembl == 'yes':
        if species == 'Mm' and array_type == 'AltMouse':
            buildAltMouseExonAnnotations(species,array_type,force,genomic_build)
        elif array_type == 'junction':
            buildJunctionExonAnnotations(species,array_type,specific_array_type,force,genomic_build)
        elif array_type == 'RNASeq':
            import RNASeq; test_status = 'no'; data_type = 'mRNA'
            RNASeq.getEnsemblAssociations(species,data_type,test_status,force)
        else: buildExonArrayExonAnnotations(species,array_type,force)

    if update_domain == 'yes':

        ### Get UCSC associations for all Ensembl linked genes (download databases if necessary)        if species == 'Mm' and array_type == 'AltMouse':
        mRNA_Type = 'mrna'; run_from_scratch = 'yes'
        export_all_associations = 'yes' ### YES only for protein prediction analysis
        buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force)

        if (species == 'Mm' and array_type == 'AltMouse'):
            """Imports and re-exports array-Ensembl annotations"""
            import JunctionArray
            null = JunctionArray.importArrayAnnotations(species,array_type); null={}
        if (species == 'Mm' and array_type == 'AltMouse') or array_type == 'junction' or array_type == 'RNASeq':
            """Performs probeset sequence aligment to Ensembl and UCSC transcripts. To do: Need to setup download if files missing"""
            import mRNASeqAlign; analysis_type = 'reciprocal'
            mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,analysis_type,force)
       
        import IdentifyAltIsoforms; run_seqcomp = 'no'
        IdentifyAltIsoforms.runProgram(species,array_type,'null',force,run_seqcomp)
        import FeatureAlignment; import JunctionArray
        FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'null')
        
        if array_type == 'junction' or array_type == 'RNASeq':
            ### For junction probeset sequences from mRNASeqAlign(), find and assess alternative proteins - export to the folder 'junction'
            mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,'single',force)
            IdentifyAltIsoforms.runProgram(species,array_type,'junction',force,run_seqcomp)
            FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'junction')
            ### For exon probesets (and junction exons) align and assess alternative proteins - export to the folder 'exon'
            IdentifyAltIsoforms.runProgram(species,array_type,'exon',force,run_seqcomp)
            # FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'exon') # not needed
            
            """ Repeat above with CoordinateBasedMatching = True """ 
            ### Peform coordinate based junction mapping to transcripts (requires certain sequence files built in IdentifyAltIosofmrs)
            analysis_type = 'reciprocal'
            mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,analysis_type,force,CoordinateBasedMatching = True)
            IdentifyAltIsoforms.runProgram(species,array_type,'null',force,run_seqcomp)
            FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'null')
            mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,'single',force,CoordinateBasedMatching = True)
            IdentifyAltIsoforms.runProgram(species,array_type,'junction',force,run_seqcomp)
            FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'junction')
            IdentifyAltIsoforms.runProgram(species,array_type,'exon',force,run_seqcomp)
            if array_type == 'RNASeq':
                JunctionArray.combineExonJunctionAnnotations(species,array_type)
                
    if update_miRs == 'yes':
        if update_miR_seq == 'yes':
            import MatchMiRTargetPredictions; only_add_sequence_to_previous_results = 'no'
            MatchMiRTargetPredictions.runProgram(species,force,only_add_sequence_to_previous_results)

        if array_type == 'exon' or array_type == 'gene':        
            import ExonSeqModule
            stringency = 'strict'; process_microRNA_predictions = 'yes'; mir_source = 'multiple'
            ExonSeqModule.runProgram(species,array_type,process_microRNA_predictions,mir_source,stringency)
            stringency = 'lax'
            ExonSeqModule.runProgram(species,array_type,process_microRNA_predictions,mir_source,stringency)
            ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build
        else:
            import JunctionSeqModule
            stringency = 'strict'; mir_source = 'multiple'
            JunctionSeqModule.runProgram(species,array_type,mir_source,stringency,force)
            stringency = 'lax'
            JunctionSeqModule.runProgram(species,array_type,mir_source,stringency,force)

    if array_type == 'junction':
        try:
            import JunctionArray; import JunctionArrayEnsemblRules
            JunctionArray.filterForCriticalExons(species,array_type)
            JunctionArray.overRideExonEntriesWithJunctions(species,array_type)
            JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(species,array_type)
            ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build
        except IOError: print 'No built junction files to analyze';sys.exit()
    if array_type == 'RNASeq' and (species == 'Hs' or species == 'Mm' or species == 'Rn'):
        import JunctionArray; import JunctionArrayEnsemblRules
        try: JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(species,array_type)
        except IOError: print 'No Ensembl_exons.txt file to analyze';sys.exit()
    
    try:
        filename = 'AltDatabase/'+species+'/SequenceData/miRBS-combined_gene-targets.txt'; ef=filepath(filename)
        er = string.replace(ef,species+'/SequenceData/miRBS-combined_gene-targets.txt','ensembl/'+species+'/'+species+'_microRNA-Ensembl.txt')
        import shutil; shutil.copyfile(ef,er)
    except Exception: null=[]
    if array_type != 'RNASeq':
        ### Get the probeset-probe relationships from online - needed for FIRMA analysis
        filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probeset-probes.txt'
        if array_type == 'junction' and 'lue' in specific_array_type:
            server_folder = 'junction/hGlue'
            verifyFile(filename,server_folder) ### Will force download if missing
            verifyFile('AltDatabase/'+species+'/'+array_type+'/platform.txt',server_folder) ### Will force download if missing
        elif array_type != 'AltMouse': verifyFile(filename,array_type) ### Will force download if missing
        if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn':
            try:
                ### Available for select exon-arrays and AltMouse
                probeset_to_remove_file = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probes_to_remove.txt'
                verifyFile(probeset_to_remove_file,array_type)
            except Exception: null=[]
Ejemplo n.º 2
0
def executeParameters(species, array_type, force, genomic_build,
                      update_uniprot, update_ensembl,
                      update_probeset_to_ensembl, update_domain, update_miRs,
                      update_all, update_miR_seq, ensembl_version):
    if '|' in array_type:
        array_type, specific_array_type = string.split(
            array_type, '|'
        )  ### To destinguish between array sub-types, like the HJAY and hGlue
    else:
        specific_array_type = array_type

    if update_all == 'yes':
        update_uniprot = 'yes'
        update_ensembl = 'yes'
        update_probeset_to_ensembl = 'yes'
        update_domain = 'yes'
        update_miRs = 'yes'

    if update_ensembl == 'yes':
        import EnsemblSQL
        reload(EnsemblSQL)
        """ Used to grab all essential Ensembl annotations previously obtained via BioMart"""
        configType = 'Advanced'
        analysisType = 'AltAnalyzeDBs'
        externalDBName = ''
        EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species, configType,
                                                       analysisType,
                                                       externalDBName,
                                                       ensembl_version, force)
        """ Used to grab Ensembl-to-External gene associations"""
        configType = 'Basic'
        analysisType = 'ExternalOnly'
        externalDBName = 'Uniprot/SWISSPROT'
        EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species, configType,
                                                       analysisType,
                                                       externalDBName,
                                                       ensembl_version, force)
        """ Used to grab Ensembl full gene sequence plus promoter and 3'UTRs """
        if array_type == 'AltMouse' or array_type == 'junction' or array_type == 'RNASeq':
            EnsemblSQL.getFullGeneSequences(ensembl_version, species)

    if update_uniprot == 'yes':
        ###Might need to delete the existing versions of downloaded databases or force download
        buildUniProtFunctAnnotations(species, force)

    if update_probeset_to_ensembl == 'yes':
        if species == 'Mm' and array_type == 'AltMouse':
            buildAltMouseExonAnnotations(species, array_type, force,
                                         genomic_build)
        elif array_type == 'junction':
            buildJunctionExonAnnotations(species, array_type,
                                         specific_array_type, force,
                                         genomic_build)
        elif array_type == 'RNASeq':
            import RNASeq
            test_status = 'no'
            data_type = 'mRNA'
            RNASeq.getEnsemblAssociations(species, data_type, test_status,
                                          force)
        else:
            buildExonArrayExonAnnotations(species, array_type, force)

    if update_domain == 'yes':

        ### Get UCSC associations for all Ensembl linked genes (download databases if necessary)        if species == 'Mm' and array_type == 'AltMouse':
        mRNA_Type = 'mrna'
        run_from_scratch = 'yes'
        export_all_associations = 'yes'  ### YES only for protein prediction analysis
        buildUCSCAnnoationFiles(species, mRNA_Type, export_all_associations,
                                run_from_scratch, force)

        if (species == 'Mm' and array_type == 'AltMouse'):
            """Imports and re-exports array-Ensembl annotations"""
            import JunctionArray
            null = JunctionArray.importArrayAnnotations(species, array_type)
            null = {}
        if (species == 'Mm' and array_type == 'AltMouse'
            ) or array_type == 'junction' or array_type == 'RNASeq':
            """Performs probeset sequence aligment to Ensembl and UCSC transcripts. To do: Need to setup download if files missing"""
            import mRNASeqAlign
            analysis_type = 'reciprocal'
            mRNASeqAlign.alignProbesetsToTranscripts(species, array_type,
                                                     analysis_type, force)

        import IdentifyAltIsoforms
        run_seqcomp = 'no'
        IdentifyAltIsoforms.runProgram(species, array_type, 'null', force,
                                       run_seqcomp)
        import FeatureAlignment
        FeatureAlignment.findDomainsByGenomeCoordinates(
            species, array_type, 'null')

        if array_type == 'junction' or array_type == 'RNASeq':
            ### For junction probeset sequences from mRNASeqAlign(), find and assess alternative proteins - export to the folder 'junction'
            mRNASeqAlign.alignProbesetsToTranscripts(species, array_type,
                                                     'single', force)
            IdentifyAltIsoforms.runProgram(species, array_type, 'junction',
                                           force, run_seqcomp)
            FeatureAlignment.findDomainsByGenomeCoordinates(
                species, array_type, 'junction')
            if array_type == 'junction' or array_type == 'RNASeq':
                ### For exon probesets (and junction exons) align and assess alternative proteins - export to the folder 'exon'
                IdentifyAltIsoforms.runProgram(species, array_type, 'exon',
                                               force, run_seqcomp)
                # FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'exon') # not needed
                if array_type == 'RNASeq':
                    import JunctionArray
                    JunctionArray.combineExonJunctionAnnotations(
                        species, array_type)

    if update_miRs == 'yes':
        if update_miR_seq == 'yes':
            import MatchMiRTargetPredictions
            only_add_sequence_to_previous_results = 'no'
            MatchMiRTargetPredictions.runProgram(
                species, force, only_add_sequence_to_previous_results)

        if array_type == 'exon' or array_type == 'gene':
            import ExonSeqModule
            stringency = 'strict'
            process_microRNA_predictions = 'yes'
            mir_source = 'multiple'
            ExonSeqModule.runProgram(species, array_type,
                                     process_microRNA_predictions, mir_source,
                                     stringency)
            stringency = 'lax'
            ExonSeqModule.runProgram(species, array_type,
                                     process_microRNA_predictions, mir_source,
                                     stringency)
            ExonArray.exportMetaProbesets(
                array_type, species)  ### Export metaprobesets for this build
        else:
            import JunctionSeqModule
            stringency = 'strict'
            mir_source = 'multiple'
            JunctionSeqModule.runProgram(species, array_type, mir_source,
                                         stringency, force)
            stringency = 'lax'
            JunctionSeqModule.runProgram(species, array_type, mir_source,
                                         stringency, force)

    if array_type == 'junction':
        try:
            import JunctionArray
            import JunctionArrayEnsemblRules
            JunctionArray.filterForCriticalExons(species, array_type)
            JunctionArray.overRideExonEntriesWithJunctions(species, array_type)
            JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(
                species, array_type)
            ExonArray.exportMetaProbesets(
                array_type, species)  ### Export metaprobesets for this build
        except IOError:
            print 'No built junction files to analyze'
            sys.exit()
    if array_type == 'RNASeq' and (species == 'Hs' or species == 'Mm'
                                   or species == 'Rn'):
        import JunctionArray
        import JunctionArrayEnsemblRules
        try:
            JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(
                species, array_type)
        except IOError:
            print 'No Ensembl_exons.txt file to analyze'
            sys.exit()

    try:
        filename = 'AltDatabase/' + species + '/SequenceData/miRBS-combined_gene-targets.txt'
        ef = filepath(filename)
        er = string.replace(
            ef, species + '/SequenceData/miRBS-combined_gene-targets.txt',
            'ensembl/' + species + '/' + species + '_microRNA-Ensembl.txt')
        import shutil
        shutil.copyfile(ef, er)
    except Exception:
        null = []
    if array_type != 'RNASeq':
        ### Get the probeset-probe relationships from online - needed for FIRMA analysis
        filename = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probeset-probes.txt'
        if array_type == 'junction' and 'lue' in specific_array_type:
            server_folder = 'junction/hGlue'
            verifyFile(filename,
                       server_folder)  ### Will force download if missing
            verifyFile('AltDatabase/' + species + '/' + array_type +
                       '/platform.txt',
                       server_folder)  ### Will force download if missing
        elif array_type != 'AltMouse':
            verifyFile(filename,
                       array_type)  ### Will force download if missing
        if (array_type == 'exon'
                or array_type == 'AltMouse') and species != 'Rn':
            try:
                ### Available for select exon-arrays and AltMouse
                probeset_to_remove_file = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probes_to_remove.txt'
                verifyFile(probeset_to_remove_file, array_type)
            except Exception:
                null = []