def executeParameters(species,array_type,force,genomic_build,update_uniprot,update_ensembl,update_probeset_to_ensembl,update_domain,update_miRs,update_all,update_miR_seq,ensembl_version): if '|' in array_type: array_type, specific_array_type = string.split(array_type,'|') ### To destinguish between array sub-types, like the HJAY and hGlue else: specific_array_type = array_type if update_all == 'yes': update_uniprot='yes'; update_ensembl='yes'; update_probeset_to_ensembl='yes'; update_domain='yes'; update_miRs = 'yes' if update_ensembl == 'yes': import EnsemblSQL; reload(EnsemblSQL) """ Used to grab all essential Ensembl annotations previously obtained via BioMart""" configType = 'Advanced'; analysisType = 'AltAnalyzeDBs'; externalDBName = '' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species,configType,analysisType,externalDBName,ensembl_version,force) """ Used to grab Ensembl-to-External gene associations""" configType = 'Basic'; analysisType = 'ExternalOnly'; externalDBName = 'Uniprot/SWISSPROT' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species,configType,analysisType,externalDBName,ensembl_version,force) """ Used to grab Ensembl full gene sequence plus promoter and 3'UTRs """ if array_type == 'AltMouse' or array_type == 'junction' or array_type == 'RNASeq': EnsemblSQL.getFullGeneSequences(ensembl_version,species) if update_uniprot == 'yes': ###Might need to delete the existing versions of downloaded databases or force download buildUniProtFunctAnnotations(species,force) if update_probeset_to_ensembl == 'yes': if species == 'Mm' and array_type == 'AltMouse': buildAltMouseExonAnnotations(species,array_type,force,genomic_build) elif array_type == 'junction': buildJunctionExonAnnotations(species,array_type,specific_array_type,force,genomic_build) elif array_type == 'RNASeq': import RNASeq; test_status = 'no'; data_type = 'mRNA' RNASeq.getEnsemblAssociations(species,data_type,test_status,force) else: buildExonArrayExonAnnotations(species,array_type,force) if update_domain == 'yes': ### Get UCSC associations for all Ensembl linked genes (download databases if necessary) if species == 'Mm' and array_type == 'AltMouse': mRNA_Type = 'mrna'; run_from_scratch = 'yes' export_all_associations = 'yes' ### YES only for protein prediction analysis buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force) if (species == 'Mm' and array_type == 'AltMouse'): """Imports and re-exports array-Ensembl annotations""" import JunctionArray null = JunctionArray.importArrayAnnotations(species,array_type); null={} if (species == 'Mm' and array_type == 'AltMouse') or array_type == 'junction' or array_type == 'RNASeq': """Performs probeset sequence aligment to Ensembl and UCSC transcripts. To do: Need to setup download if files missing""" import mRNASeqAlign; analysis_type = 'reciprocal' mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,analysis_type,force) import IdentifyAltIsoforms; run_seqcomp = 'no' IdentifyAltIsoforms.runProgram(species,array_type,'null',force,run_seqcomp) import FeatureAlignment; import JunctionArray FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'null') if array_type == 'junction' or array_type == 'RNASeq': ### For junction probeset sequences from mRNASeqAlign(), find and assess alternative proteins - export to the folder 'junction' mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,'single',force) IdentifyAltIsoforms.runProgram(species,array_type,'junction',force,run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'junction') ### For exon probesets (and junction exons) align and assess alternative proteins - export to the folder 'exon' IdentifyAltIsoforms.runProgram(species,array_type,'exon',force,run_seqcomp) # FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'exon') # not needed """ Repeat above with CoordinateBasedMatching = True """ ### Peform coordinate based junction mapping to transcripts (requires certain sequence files built in IdentifyAltIosofmrs) analysis_type = 'reciprocal' mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,analysis_type,force,CoordinateBasedMatching = True) IdentifyAltIsoforms.runProgram(species,array_type,'null',force,run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'null') mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,'single',force,CoordinateBasedMatching = True) IdentifyAltIsoforms.runProgram(species,array_type,'junction',force,run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'junction') IdentifyAltIsoforms.runProgram(species,array_type,'exon',force,run_seqcomp) if array_type == 'RNASeq': JunctionArray.combineExonJunctionAnnotations(species,array_type) if update_miRs == 'yes': if update_miR_seq == 'yes': import MatchMiRTargetPredictions; only_add_sequence_to_previous_results = 'no' MatchMiRTargetPredictions.runProgram(species,force,only_add_sequence_to_previous_results) if array_type == 'exon' or array_type == 'gene': import ExonSeqModule stringency = 'strict'; process_microRNA_predictions = 'yes'; mir_source = 'multiple' ExonSeqModule.runProgram(species,array_type,process_microRNA_predictions,mir_source,stringency) stringency = 'lax' ExonSeqModule.runProgram(species,array_type,process_microRNA_predictions,mir_source,stringency) ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build else: import JunctionSeqModule stringency = 'strict'; mir_source = 'multiple' JunctionSeqModule.runProgram(species,array_type,mir_source,stringency,force) stringency = 'lax' JunctionSeqModule.runProgram(species,array_type,mir_source,stringency,force) if array_type == 'junction': try: import JunctionArray; import JunctionArrayEnsemblRules JunctionArray.filterForCriticalExons(species,array_type) JunctionArray.overRideExonEntriesWithJunctions(species,array_type) JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(species,array_type) ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build except IOError: print 'No built junction files to analyze';sys.exit() if array_type == 'RNASeq' and (species == 'Hs' or species == 'Mm' or species == 'Rn'): import JunctionArray; import JunctionArrayEnsemblRules try: JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(species,array_type) except IOError: print 'No Ensembl_exons.txt file to analyze';sys.exit() try: filename = 'AltDatabase/'+species+'/SequenceData/miRBS-combined_gene-targets.txt'; ef=filepath(filename) er = string.replace(ef,species+'/SequenceData/miRBS-combined_gene-targets.txt','ensembl/'+species+'/'+species+'_microRNA-Ensembl.txt') import shutil; shutil.copyfile(ef,er) except Exception: null=[] if array_type != 'RNASeq': ### Get the probeset-probe relationships from online - needed for FIRMA analysis filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probeset-probes.txt' if array_type == 'junction' and 'lue' in specific_array_type: server_folder = 'junction/hGlue' verifyFile(filename,server_folder) ### Will force download if missing verifyFile('AltDatabase/'+species+'/'+array_type+'/platform.txt',server_folder) ### Will force download if missing elif array_type != 'AltMouse': verifyFile(filename,array_type) ### Will force download if missing if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn': try: ### Available for select exon-arrays and AltMouse probeset_to_remove_file = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probes_to_remove.txt' verifyFile(probeset_to_remove_file,array_type) except Exception: null=[]
def executeParameters(species, array_type, force, genomic_build, update_uniprot, update_ensembl, update_probeset_to_ensembl, update_domain, update_miRs, update_all, update_miR_seq, ensembl_version): if '|' in array_type: array_type, specific_array_type = string.split( array_type, '|' ) ### To destinguish between array sub-types, like the HJAY and hGlue else: specific_array_type = array_type if update_all == 'yes': update_uniprot = 'yes' update_ensembl = 'yes' update_probeset_to_ensembl = 'yes' update_domain = 'yes' update_miRs = 'yes' if update_ensembl == 'yes': import EnsemblSQL reload(EnsemblSQL) """ Used to grab all essential Ensembl annotations previously obtained via BioMart""" configType = 'Advanced' analysisType = 'AltAnalyzeDBs' externalDBName = '' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species, configType, analysisType, externalDBName, ensembl_version, force) """ Used to grab Ensembl-to-External gene associations""" configType = 'Basic' analysisType = 'ExternalOnly' externalDBName = 'Uniprot/SWISSPROT' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species, configType, analysisType, externalDBName, ensembl_version, force) """ Used to grab Ensembl full gene sequence plus promoter and 3'UTRs """ if array_type == 'AltMouse' or array_type == 'junction' or array_type == 'RNASeq': EnsemblSQL.getFullGeneSequences(ensembl_version, species) if update_uniprot == 'yes': ###Might need to delete the existing versions of downloaded databases or force download buildUniProtFunctAnnotations(species, force) if update_probeset_to_ensembl == 'yes': if species == 'Mm' and array_type == 'AltMouse': buildAltMouseExonAnnotations(species, array_type, force, genomic_build) elif array_type == 'junction': buildJunctionExonAnnotations(species, array_type, specific_array_type, force, genomic_build) elif array_type == 'RNASeq': import RNASeq test_status = 'no' data_type = 'mRNA' RNASeq.getEnsemblAssociations(species, data_type, test_status, force) else: buildExonArrayExonAnnotations(species, array_type, force) if update_domain == 'yes': ### Get UCSC associations for all Ensembl linked genes (download databases if necessary) if species == 'Mm' and array_type == 'AltMouse': mRNA_Type = 'mrna' run_from_scratch = 'yes' export_all_associations = 'yes' ### YES only for protein prediction analysis buildUCSCAnnoationFiles(species, mRNA_Type, export_all_associations, run_from_scratch, force) if (species == 'Mm' and array_type == 'AltMouse'): """Imports and re-exports array-Ensembl annotations""" import JunctionArray null = JunctionArray.importArrayAnnotations(species, array_type) null = {} if (species == 'Mm' and array_type == 'AltMouse' ) or array_type == 'junction' or array_type == 'RNASeq': """Performs probeset sequence aligment to Ensembl and UCSC transcripts. To do: Need to setup download if files missing""" import mRNASeqAlign analysis_type = 'reciprocal' mRNASeqAlign.alignProbesetsToTranscripts(species, array_type, analysis_type, force) import IdentifyAltIsoforms run_seqcomp = 'no' IdentifyAltIsoforms.runProgram(species, array_type, 'null', force, run_seqcomp) import FeatureAlignment FeatureAlignment.findDomainsByGenomeCoordinates( species, array_type, 'null') if array_type == 'junction' or array_type == 'RNASeq': ### For junction probeset sequences from mRNASeqAlign(), find and assess alternative proteins - export to the folder 'junction' mRNASeqAlign.alignProbesetsToTranscripts(species, array_type, 'single', force) IdentifyAltIsoforms.runProgram(species, array_type, 'junction', force, run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates( species, array_type, 'junction') if array_type == 'junction' or array_type == 'RNASeq': ### For exon probesets (and junction exons) align and assess alternative proteins - export to the folder 'exon' IdentifyAltIsoforms.runProgram(species, array_type, 'exon', force, run_seqcomp) # FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'exon') # not needed if array_type == 'RNASeq': import JunctionArray JunctionArray.combineExonJunctionAnnotations( species, array_type) if update_miRs == 'yes': if update_miR_seq == 'yes': import MatchMiRTargetPredictions only_add_sequence_to_previous_results = 'no' MatchMiRTargetPredictions.runProgram( species, force, only_add_sequence_to_previous_results) if array_type == 'exon' or array_type == 'gene': import ExonSeqModule stringency = 'strict' process_microRNA_predictions = 'yes' mir_source = 'multiple' ExonSeqModule.runProgram(species, array_type, process_microRNA_predictions, mir_source, stringency) stringency = 'lax' ExonSeqModule.runProgram(species, array_type, process_microRNA_predictions, mir_source, stringency) ExonArray.exportMetaProbesets( array_type, species) ### Export metaprobesets for this build else: import JunctionSeqModule stringency = 'strict' mir_source = 'multiple' JunctionSeqModule.runProgram(species, array_type, mir_source, stringency, force) stringency = 'lax' JunctionSeqModule.runProgram(species, array_type, mir_source, stringency, force) if array_type == 'junction': try: import JunctionArray import JunctionArrayEnsemblRules JunctionArray.filterForCriticalExons(species, array_type) JunctionArray.overRideExonEntriesWithJunctions(species, array_type) JunctionArrayEnsemblRules.annotateJunctionIDsAsExon( species, array_type) ExonArray.exportMetaProbesets( array_type, species) ### Export metaprobesets for this build except IOError: print 'No built junction files to analyze' sys.exit() if array_type == 'RNASeq' and (species == 'Hs' or species == 'Mm' or species == 'Rn'): import JunctionArray import JunctionArrayEnsemblRules try: JunctionArrayEnsemblRules.annotateJunctionIDsAsExon( species, array_type) except IOError: print 'No Ensembl_exons.txt file to analyze' sys.exit() try: filename = 'AltDatabase/' + species + '/SequenceData/miRBS-combined_gene-targets.txt' ef = filepath(filename) er = string.replace( ef, species + '/SequenceData/miRBS-combined_gene-targets.txt', 'ensembl/' + species + '/' + species + '_microRNA-Ensembl.txt') import shutil shutil.copyfile(ef, er) except Exception: null = [] if array_type != 'RNASeq': ### Get the probeset-probe relationships from online - needed for FIRMA analysis filename = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probeset-probes.txt' if array_type == 'junction' and 'lue' in specific_array_type: server_folder = 'junction/hGlue' verifyFile(filename, server_folder) ### Will force download if missing verifyFile('AltDatabase/' + species + '/' + array_type + '/platform.txt', server_folder) ### Will force download if missing elif array_type != 'AltMouse': verifyFile(filename, array_type) ### Will force download if missing if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn': try: ### Available for select exon-arrays and AltMouse probeset_to_remove_file = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probes_to_remove.txt' verifyFile(probeset_to_remove_file, array_type) except Exception: null = []