def temp(array_type, species): specific_array_type = 'hGlue' ExonArray.exportMetaProbesets( array_type, species) ### Export metaprobesets for this build if array_type != 'RNASeq': ### Get the probeset-probe relationships from online - needed for FIRMA analysis filename = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probeset-probes.txt' if array_type == 'junction' and 'lue' in specific_array_type: server_folder = 'junction/hGlue' verifyFile(filename, server_folder) ### Will force download if missing verifyFile('AltDatabase/' + species + '/' + array_type + '/platform.txt', server_folder) ### Will force download if missing elif array_type != 'AltMouse': verifyFile(filename, array_type) ### Will force download if missing if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn': try: ### Available for select exon-arrays and AltMouse probeset_to_remove_file = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probes_to_remove.txt' verifyFile(probeset_to_remove_file, array_type) except Exception: null = []
def temp(array_type,species): specific_array_type = 'hGlue' ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build if array_type != 'RNASeq': ### Get the probeset-probe relationships from online - needed for FIRMA analysis filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probeset-probes.txt' if array_type == 'junction' and 'lue' in specific_array_type: server_folder = 'junction/hGlue' verifyFile(filename,server_folder) ### Will force download if missing verifyFile('AltDatabase/'+species+'/'+array_type+'/platform.txt',server_folder) ### Will force download if missing elif array_type != 'AltMouse': verifyFile(filename,array_type) ### Will force download if missing if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn': try: ### Available for select exon-arrays and AltMouse probeset_to_remove_file = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probes_to_remove.txt' verifyFile(probeset_to_remove_file,array_type) except Exception: null=[]
def executeParameters(species,array_type,force,genomic_build,update_uniprot,update_ensembl,update_probeset_to_ensembl,update_domain,update_miRs,update_all,update_miR_seq,ensembl_version): if '|' in array_type: array_type, specific_array_type = string.split(array_type,'|') ### To destinguish between array sub-types, like the HJAY and hGlue else: specific_array_type = array_type if update_all == 'yes': update_uniprot='yes'; update_ensembl='yes'; update_probeset_to_ensembl='yes'; update_domain='yes'; update_miRs = 'yes' if update_ensembl == 'yes': import EnsemblSQL; reload(EnsemblSQL) """ Used to grab all essential Ensembl annotations previously obtained via BioMart""" configType = 'Advanced'; analysisType = 'AltAnalyzeDBs'; externalDBName = '' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species,configType,analysisType,externalDBName,ensembl_version,force) """ Used to grab Ensembl-to-External gene associations""" configType = 'Basic'; analysisType = 'ExternalOnly'; externalDBName = 'Uniprot/SWISSPROT' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species,configType,analysisType,externalDBName,ensembl_version,force) """ Used to grab Ensembl full gene sequence plus promoter and 3'UTRs """ if array_type == 'AltMouse' or array_type == 'junction' or array_type == 'RNASeq': EnsemblSQL.getFullGeneSequences(ensembl_version,species) if update_uniprot == 'yes': ###Might need to delete the existing versions of downloaded databases or force download buildUniProtFunctAnnotations(species,force) if update_probeset_to_ensembl == 'yes': if species == 'Mm' and array_type == 'AltMouse': buildAltMouseExonAnnotations(species,array_type,force,genomic_build) elif array_type == 'junction': buildJunctionExonAnnotations(species,array_type,specific_array_type,force,genomic_build) elif array_type == 'RNASeq': import RNASeq; test_status = 'no'; data_type = 'mRNA' RNASeq.getEnsemblAssociations(species,data_type,test_status,force) else: buildExonArrayExonAnnotations(species,array_type,force) if update_domain == 'yes': ### Get UCSC associations for all Ensembl linked genes (download databases if necessary) if species == 'Mm' and array_type == 'AltMouse': mRNA_Type = 'mrna'; run_from_scratch = 'yes' export_all_associations = 'yes' ### YES only for protein prediction analysis buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force) if (species == 'Mm' and array_type == 'AltMouse'): """Imports and re-exports array-Ensembl annotations""" import JunctionArray null = JunctionArray.importArrayAnnotations(species,array_type); null={} if (species == 'Mm' and array_type == 'AltMouse') or array_type == 'junction' or array_type == 'RNASeq': """Performs probeset sequence aligment to Ensembl and UCSC transcripts. To do: Need to setup download if files missing""" import mRNASeqAlign; analysis_type = 'reciprocal' mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,analysis_type,force) import IdentifyAltIsoforms; run_seqcomp = 'no' IdentifyAltIsoforms.runProgram(species,array_type,'null',force,run_seqcomp) import FeatureAlignment; import JunctionArray FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'null') if array_type == 'junction' or array_type == 'RNASeq': ### For junction probeset sequences from mRNASeqAlign(), find and assess alternative proteins - export to the folder 'junction' mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,'single',force) IdentifyAltIsoforms.runProgram(species,array_type,'junction',force,run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'junction') ### For exon probesets (and junction exons) align and assess alternative proteins - export to the folder 'exon' IdentifyAltIsoforms.runProgram(species,array_type,'exon',force,run_seqcomp) # FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'exon') # not needed """ Repeat above with CoordinateBasedMatching = True """ ### Peform coordinate based junction mapping to transcripts (requires certain sequence files built in IdentifyAltIosofmrs) analysis_type = 'reciprocal' mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,analysis_type,force,CoordinateBasedMatching = True) IdentifyAltIsoforms.runProgram(species,array_type,'null',force,run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'null') mRNASeqAlign.alignProbesetsToTranscripts(species,array_type,'single',force,CoordinateBasedMatching = True) IdentifyAltIsoforms.runProgram(species,array_type,'junction',force,run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'junction') IdentifyAltIsoforms.runProgram(species,array_type,'exon',force,run_seqcomp) if array_type == 'RNASeq': JunctionArray.combineExonJunctionAnnotations(species,array_type) if update_miRs == 'yes': if update_miR_seq == 'yes': import MatchMiRTargetPredictions; only_add_sequence_to_previous_results = 'no' MatchMiRTargetPredictions.runProgram(species,force,only_add_sequence_to_previous_results) if array_type == 'exon' or array_type == 'gene': import ExonSeqModule stringency = 'strict'; process_microRNA_predictions = 'yes'; mir_source = 'multiple' ExonSeqModule.runProgram(species,array_type,process_microRNA_predictions,mir_source,stringency) stringency = 'lax' ExonSeqModule.runProgram(species,array_type,process_microRNA_predictions,mir_source,stringency) ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build else: import JunctionSeqModule stringency = 'strict'; mir_source = 'multiple' JunctionSeqModule.runProgram(species,array_type,mir_source,stringency,force) stringency = 'lax' JunctionSeqModule.runProgram(species,array_type,mir_source,stringency,force) if array_type == 'junction': try: import JunctionArray; import JunctionArrayEnsemblRules JunctionArray.filterForCriticalExons(species,array_type) JunctionArray.overRideExonEntriesWithJunctions(species,array_type) JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(species,array_type) ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build except IOError: print 'No built junction files to analyze';sys.exit() if array_type == 'RNASeq' and (species == 'Hs' or species == 'Mm' or species == 'Rn'): import JunctionArray; import JunctionArrayEnsemblRules try: JunctionArrayEnsemblRules.annotateJunctionIDsAsExon(species,array_type) except IOError: print 'No Ensembl_exons.txt file to analyze';sys.exit() try: filename = 'AltDatabase/'+species+'/SequenceData/miRBS-combined_gene-targets.txt'; ef=filepath(filename) er = string.replace(ef,species+'/SequenceData/miRBS-combined_gene-targets.txt','ensembl/'+species+'/'+species+'_microRNA-Ensembl.txt') import shutil; shutil.copyfile(ef,er) except Exception: null=[] if array_type != 'RNASeq': ### Get the probeset-probe relationships from online - needed for FIRMA analysis filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probeset-probes.txt' if array_type == 'junction' and 'lue' in specific_array_type: server_folder = 'junction/hGlue' verifyFile(filename,server_folder) ### Will force download if missing verifyFile('AltDatabase/'+species+'/'+array_type+'/platform.txt',server_folder) ### Will force download if missing elif array_type != 'AltMouse': verifyFile(filename,array_type) ### Will force download if missing if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn': try: ### Available for select exon-arrays and AltMouse probeset_to_remove_file = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_probes_to_remove.txt' verifyFile(probeset_to_remove_file,array_type) except Exception: null=[]
def executeParameters(species, array_type, force, genomic_build, update_uniprot, update_ensembl, update_probeset_to_ensembl, update_domain, update_miRs, update_all, update_miR_seq, ensembl_version): if '|' in array_type: array_type, specific_array_type = string.split( array_type, '|' ) ### To destinguish between array sub-types, like the HJAY and hGlue else: specific_array_type = array_type if update_all == 'yes': update_uniprot = 'yes' update_ensembl = 'yes' update_probeset_to_ensembl = 'yes' update_domain = 'yes' update_miRs = 'yes' if update_ensembl == 'yes': import EnsemblSQL reload(EnsemblSQL) """ Used to grab all essential Ensembl annotations previously obtained via BioMart""" configType = 'Advanced' analysisType = 'AltAnalyzeDBs' externalDBName = '' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species, configType, analysisType, externalDBName, ensembl_version, force) """ Used to grab Ensembl-to-External gene associations""" configType = 'Basic' analysisType = 'ExternalOnly' externalDBName = 'Uniprot/SWISSPROT' EnsemblSQL.buildEnsemblRelationalTablesFromSQL(species, configType, analysisType, externalDBName, ensembl_version, force) """ Used to grab Ensembl full gene sequence plus promoter and 3'UTRs """ if array_type == 'AltMouse' or array_type == 'junction' or array_type == 'RNASeq': EnsemblSQL.getFullGeneSequences(ensembl_version, species) if update_uniprot == 'yes': ###Might need to delete the existing versions of downloaded databases or force download buildUniProtFunctAnnotations(species, force) if update_probeset_to_ensembl == 'yes': if species == 'Mm' and array_type == 'AltMouse': buildAltMouseExonAnnotations(species, array_type, force, genomic_build) elif array_type == 'junction': buildJunctionExonAnnotations(species, array_type, specific_array_type, force, genomic_build) elif array_type == 'RNASeq': import RNASeq test_status = 'no' data_type = 'mRNA' RNASeq.getEnsemblAssociations(species, data_type, test_status, force) else: buildExonArrayExonAnnotations(species, array_type, force) if update_domain == 'yes': ### Get UCSC associations for all Ensembl linked genes (download databases if necessary) if species == 'Mm' and array_type == 'AltMouse': mRNA_Type = 'mrna' run_from_scratch = 'yes' export_all_associations = 'yes' ### YES only for protein prediction analysis buildUCSCAnnoationFiles(species, mRNA_Type, export_all_associations, run_from_scratch, force) if (species == 'Mm' and array_type == 'AltMouse'): """Imports and re-exports array-Ensembl annotations""" import JunctionArray null = JunctionArray.importArrayAnnotations(species, array_type) null = {} if (species == 'Mm' and array_type == 'AltMouse' ) or array_type == 'junction' or array_type == 'RNASeq': """Performs probeset sequence aligment to Ensembl and UCSC transcripts. To do: Need to setup download if files missing""" import mRNASeqAlign analysis_type = 'reciprocal' mRNASeqAlign.alignProbesetsToTranscripts(species, array_type, analysis_type, force) import IdentifyAltIsoforms run_seqcomp = 'no' IdentifyAltIsoforms.runProgram(species, array_type, 'null', force, run_seqcomp) import FeatureAlignment FeatureAlignment.findDomainsByGenomeCoordinates( species, array_type, 'null') if array_type == 'junction' or array_type == 'RNASeq': ### For junction probeset sequences from mRNASeqAlign(), find and assess alternative proteins - export to the folder 'junction' mRNASeqAlign.alignProbesetsToTranscripts(species, array_type, 'single', force) IdentifyAltIsoforms.runProgram(species, array_type, 'junction', force, run_seqcomp) FeatureAlignment.findDomainsByGenomeCoordinates( species, array_type, 'junction') if array_type == 'junction' or array_type == 'RNASeq': ### For exon probesets (and junction exons) align and assess alternative proteins - export to the folder 'exon' IdentifyAltIsoforms.runProgram(species, array_type, 'exon', force, run_seqcomp) # FeatureAlignment.findDomainsByGenomeCoordinates(species,array_type,'exon') # not needed if array_type == 'RNASeq': import JunctionArray JunctionArray.combineExonJunctionAnnotations( species, array_type) if update_miRs == 'yes': if update_miR_seq == 'yes': import MatchMiRTargetPredictions only_add_sequence_to_previous_results = 'no' MatchMiRTargetPredictions.runProgram( species, force, only_add_sequence_to_previous_results) if array_type == 'exon' or array_type == 'gene': import ExonSeqModule stringency = 'strict' process_microRNA_predictions = 'yes' mir_source = 'multiple' ExonSeqModule.runProgram(species, array_type, process_microRNA_predictions, mir_source, stringency) stringency = 'lax' ExonSeqModule.runProgram(species, array_type, process_microRNA_predictions, mir_source, stringency) ExonArray.exportMetaProbesets( array_type, species) ### Export metaprobesets for this build else: import JunctionSeqModule stringency = 'strict' mir_source = 'multiple' JunctionSeqModule.runProgram(species, array_type, mir_source, stringency, force) stringency = 'lax' JunctionSeqModule.runProgram(species, array_type, mir_source, stringency, force) if array_type == 'junction': try: import JunctionArray import JunctionArrayEnsemblRules JunctionArray.filterForCriticalExons(species, array_type) JunctionArray.overRideExonEntriesWithJunctions(species, array_type) JunctionArrayEnsemblRules.annotateJunctionIDsAsExon( species, array_type) ExonArray.exportMetaProbesets( array_type, species) ### Export metaprobesets for this build except IOError: print 'No built junction files to analyze' sys.exit() if array_type == 'RNASeq' and (species == 'Hs' or species == 'Mm' or species == 'Rn'): import JunctionArray import JunctionArrayEnsemblRules try: JunctionArrayEnsemblRules.annotateJunctionIDsAsExon( species, array_type) except IOError: print 'No Ensembl_exons.txt file to analyze' sys.exit() try: filename = 'AltDatabase/' + species + '/SequenceData/miRBS-combined_gene-targets.txt' ef = filepath(filename) er = string.replace( ef, species + '/SequenceData/miRBS-combined_gene-targets.txt', 'ensembl/' + species + '/' + species + '_microRNA-Ensembl.txt') import shutil shutil.copyfile(ef, er) except Exception: null = [] if array_type != 'RNASeq': ### Get the probeset-probe relationships from online - needed for FIRMA analysis filename = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probeset-probes.txt' if array_type == 'junction' and 'lue' in specific_array_type: server_folder = 'junction/hGlue' verifyFile(filename, server_folder) ### Will force download if missing verifyFile('AltDatabase/' + species + '/' + array_type + '/platform.txt', server_folder) ### Will force download if missing elif array_type != 'AltMouse': verifyFile(filename, array_type) ### Will force download if missing if (array_type == 'exon' or array_type == 'AltMouse') and species != 'Rn': try: ### Available for select exon-arrays and AltMouse probeset_to_remove_file = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_probes_to_remove.txt' verifyFile(probeset_to_remove_file, array_type) except Exception: null = []
def probesetSummarize(exp_file_location_db,analyze_metaprobesets,probeset_type,species,root): for dataset in exp_file_location_db: ### Instance of the Class ExpressionFileLocationData fl = exp_file_location_db[dataset] apt_dir =fl.APTLocation() array_type=fl.ArrayType() pgf_file=fl.InputCDFFile() clf_file=fl.CLFFile() bgp_file=fl.BGPFile() xhyb_remove = fl.XHybRemoval() cel_dir=fl.CELFileDir() + '/cel_files.txt' expression_file = fl.ExpFile() stats_file = fl.StatsFile() output_dir = fl.OutputDir() + '/APT-output' cache_dir = output_dir + '/apt-probeset-summarize-cache' architecture = fl.Architecture() ### May over-ride the real architecture if a failure occurs get_probe_level_results = 'yes' if get_probe_level_results == 'yes': export_features = 'yes' if xhyb_remove == 'yes' and (array_type == 'gene' or array_type == 'junction'): xhyb_remove = 'no' ### This is set when the user mistakenly selects exon array, initially if analyze_metaprobesets == 'yes': export_features = 'true' metaprobeset_file = filepath('AltDatabase/'+species+'/'+array_type+'/'+species+'_'+array_type+'_'+probeset_type+'.mps') count = verifyFileLength(metaprobeset_file) if count<2: import ExonArray ExonArray.exportMetaProbesets(array_type,species) ### Export metaprobesets for this build import subprocess; import platform print 'Processor architecture set =',architecture,platform.machine() if '/bin' in apt_dir: apt_file = apt_dir +'/apt-probeset-summarize' ### if the user selects an APT directory elif os.name == 'nt': if '32bit' in architecture: apt_file = apt_dir + '/PC/32bit/apt-probeset-summarize'; plat = 'Windows' elif '64bit' in architecture: apt_file = apt_dir + '/PC/64bit/apt-probeset-summarize'; plat = 'Windows' elif 'darwin' in sys.platform: apt_file = apt_dir + '/Mac/apt-probeset-summarize'; plat = 'MacOSX' elif 'linux' in sys.platform: if '32bit' in platform.architecture(): apt_file = apt_dir + '/Linux/32bit/apt-probeset-summarize'; plat = 'linux32bit' elif '64bit' in platform.architecture(): apt_file = apt_dir + '/Linux/64bit/apt-probeset-summarize'; plat = 'linux64bit' apt_file = filepath(apt_file) apt_extract_file = string.replace(apt_file,'probeset-summarize','cel-extract') #print 'AltAnalyze has choosen APT for',plat print "Beginning probeset summarization of input CEL files with Affymetrix Power Tools (APT)..." if 'cdf' in pgf_file or 'CDF' in pgf_file: if xhyb_remove == 'yes' and array_type == 'AltMouse': kill_list_dir = osfilepath('AltDatabase/'+species+'/AltMouse/'+species+'_probes_to_remove.txt') else: kill_list_dir = osfilepath('AltDatabase/affymetrix/APT/probes_to_remove.txt') try: ### Below code attempts to calculate probe-level summarys and absent/present p-values ### for 3'arrays (may fail for arrays with missing missmatch probes - AltMouse) cdf_file = pgf_file; algorithm = 'rma' retcode = subprocess.call([ apt_file, "-d", cdf_file, "--kill-list", kill_list_dir, "-a", algorithm, "-o", output_dir, "--cel-files", cel_dir, "-a", "pm-mm,mas5-detect.calls=1.pairs=1"]) try: extract_retcode = subprocess.call([ apt_extract_file, "-d", cdf_file, "--pm-with-mm-only", "-o", output_dir+'/probe.summary.txt', "--cel-files", cel_dir, "-a"]) ### "quant-norm,pm-gcbg", "--report-background" -requires a BGP file except Exception,e: #print traceback.format_exc() retcode = False ### On some system there is a no file found error, even when the analysis completes correctly if retcode: status = 'failed' else: status = 'run' summary_exp_file = output_dir+'/'+algorithm+'.summary.txt' export.customFileCopy(summary_exp_file, expression_file) ### Removes the # containing lines #shutil.copyfile(summary_exp_file, expression_file) os.remove(summary_exp_file) summary_stats_file = output_dir+'/pm-mm.mas5-detect.summary.txt' try: shutil.copyfile(summary_stats_file, stats_file) except Exception: None ### Occurs if dabg export failed os.remove(summary_stats_file) except Exception: #print traceback.format_exc() try: cdf_file = pgf_file; algorithm = 'rma'; pval = 'dabg' retcode = subprocess.call([ apt_file, "-d", cdf_file, "--kill-list", kill_list_dir, "-a", algorithm, "-o", output_dir, "--cel-files", cel_dir]) # "-a", pval, if retcode: status = 'failed' else: status = 'run' summary_exp_file = output_dir+'/'+algorithm+'.summary.txt' export.customFileCopy(summary_exp_file, expression_file) ### Removes the # containing lines #shutil.copyfile(summary_exp_file, expression_file) os.remove(summary_exp_file) except NameError: status = 'failed'