def protocol(self): """Main entrypoint for the protocol""" #Create a output log file that we can append to with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine Section (Required) ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print("[Protocols:Epitope Mapping Error] The combinepact config file is incorrect.") print("[Protocols:Epitope Mapping Error] There is something wrong with the [combinepact] section.") quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print("[Protocols:Epitope Mapping Error] combine_pact was not found.") #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** PDB Import Section ***************************************** """ #Only import and run if selected if self.dict_workflow['pdb_import']: #Check to see if the section is there if not self.obj_cfgparser.has_section('pdb_import'): print("[Protocols:Epitope Mapping Error] The pdb_import config file is incorrect.") print("[Protocols:Epitope Mapping Error] There is something wrong with the [pdb_import] section.") quit() #Import our combinepact class try: from pact.analysis.pdb_import import pdb_import except ImportError: print("[Protocols:Epitope Mapping Error] pdb_import was not found.") #Create the object then call the merger obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #The dict will be like {'pdb name': {data... dict_pdb = obj_pdb.pdb_import() return
def protocol(self): """Provide a protocol that does general analyses that don't need a full protocol""" #Create a output log file that we can append to with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine (Required) ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print("[Protocols:Analysis Error] The combinepact config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [combinepact] section.") quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print("[Protocols:Analysis Error] combine_pact was not found.") #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #Print Section Progress print("[Protocols:Analysis] Combine PACT") #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** PDB Import Section ***************************************** """ #Only import and run if selected if self.dict_workflow['pdb_import']: #Check to see if the section is there if not self.obj_cfgparser.has_section('pdb_import'): print("[Protocols:Analysis Error] The pdb_import config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [pdb_import] section.") quit() #Import our combinepact class try: from pact.analysis.pdb_import import pdb_import except ImportError: print("[Protocols:Analysis Error] pdb_import was not found.") #Create the object then call the merger obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Print Section Progress print("[Protocols:Analysis] PDB Import") #The dict will be like {'pdb name': {data... dict_pdb = obj_pdb.pdb_import() """ ***************************************** Assign colors to classifiers ***************************************** """ #Only import and run if selected if self.dict_workflow['classifier_color']: #Check to see if the section is there if not self.obj_cfgparser.has_section('classifier_color'): print("[Protocols:Analysis Error] The classifier_color config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [classifier_color] section.") quit() #Import our combinepact class try: from pact.analysis.classifier_color import classifier_color except ImportError: print("[Protocols:Analysis Error] classifer_color was not found.") #Create the object then call the merger obj_classcolor = classifier_color(self.obj_cfgparser, self.dict_programs, {}) #Print Section Progress print("[Protocols:Analysis] Classifier Color") #This section returns a dict of [loc][mut] = "color" if self.obj_cfgparser.get("classifier_color", "classifier").split(',')[0] == "pdb": dict_custom_color = obj_classcolor.classifier_color(dict_merged_datasets, dict_pdb, "pdb") """ ***************************************** Set vs Set Section ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('setvsset'): print("[Protocols:Analysis Error] The setvsset config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [setvsset] section.") quit() #Import our setvsset class try: from pact.analysis.set_vs_set import set_vs_set except ImportError: print("[Protocols:Analysis Error] set_vs_set was not found.") #Create the object then call the merger obj_svs = set_vs_set(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Do we have structural data? if self.dict_workflow['classifier_color']: print("[Protocols:Analysis] Dataset vs Dataset") file_output.write(obj_svs.set_vs_set(dict_merged_datasets, dict_custom_color)) else: print("[Protocols:Analysis] Dataset vs Dataset") file_output.write(obj_svs.set_vs_set(dict_merged_datasets)) return
def protocol(self): """Main entrypoint for the protocol""" #Create a output log file that we can append to with open( self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") #Import our class try: from pact.analysis.sequence.homology_pssm import homology_classifier except ImportError: print( "[Protocols:Homology Error] pact.analysis.sequence.homology_pssm was not found." ) quit() #Create our object obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory': self.directory}) """ ***************************************** DNA Filtering/Alignment Section ***************************************** """ if self.dict_workflow['blastp_align_filter']: #Convert our XML file print("[Protocols:Homology] xml_to_fasta") file_output.write("[Protocols:Homology] xml_to_fasta\n") obj_homology.xml_to_fasta() #Run CD-HIT on our new fasta file print("[Protocols:Homology] cdhit") file_output.write("[Protocols:Homology] cdhit\n") #Check to see if the number of processes is logical self.processes = self.obj_cfgparser.get( "blastp_align_filter", "processes") if int(self.processes) <= 0: self.processes = "2" check_output([ self.dict_programs['cdhit'], "-i", self.directory + self.output_prefix + ".fa", "-o", self.directory + self.output_prefix + ".afa", "-c", str( self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")), "-M", "40000", "-T", str(self.processes) ]) #Check to see if we have WT in our cdhit output print("[Protocols:Homology] cdhit_wtcheck") file_output.write("[Protocols:Homology] cdhit_wtcheck\n") obj_homology.cdhit_wt_check() #Run MUSCLE on our new fasta file print("[Protocols:Homology] muscle") file_output.write("[Protocols:Homology] muscle\n") check_output([ self.dict_programs['muscle'], "-in", self.directory + self.output_prefix + ".afa", "-out", self.directory + self.output_prefix + ".msa" ]) #Process our MSA (needs to be on for PSIBlast) print("[Protocols:Homology] processmsa") file_output.write("[Protocols:Homology] processmsa\n") list_msa = obj_homology.process_msa() #Save our list print("[Protocols:Homology] Saving our MSA") file_output.write("[Protocols:Homology] Saving our MSA\n") save_pact_file( list_msa, self.directory + self.output_prefix + '_' + "list_msa") """ ***************************************** PSSM Section ***************************************** """ if self.dict_workflow['pssm']: #Open our list print("[Protocols:Homology] Opening our MSA") file_output.write("[Protocols:Homology] Opening our MSA\n") list_msa = open_pact_file(self.directory + self.output_prefix + '_' + "list_msa") #Split our msa for PSIBlast (needs to be on for PSIBlast) print("[Protocols:Homology] msa_split") file_output.write("[Protocols:Homology] msa_split\n") list_pbcmds = obj_homology.msa_split(list_msa) #Run PSIBlast print("[Protocols:Homology] psiblast") file_output.write("[Protocols:Homology] psiblast\n") for command in list_pbcmds: check_output([self.dict_programs['psiblast'], *command]) #Import our PSSM data print("[Protocols:Homology] pssm_file_import") file_output.write("[Protocols:Homology] pssm_file_import\n") dict_pssm = obj_homology.pssm_file_import() #Save our heatmap print("[Protocols:Homology] Saving a PSSM .csv heatmap") file_output.write( obj_homology.pssm_output_heat(dict_pssm) + "\n") #Save our csv print("[Protocols:Homology] Saving a PSSM .csv column data") file_output.write( obj_homology.pssm_output_csv(dict_pssm) + "\n") #Save our PACT File print("[Protocols:Homology] Saving a PSSM .pact file") file_output.write( save_pact_file( dict_pssm, self.directory + self.output_prefix + '_' + "PSSM") + "\n") """ ***************************************** Sitewise Frequency Section ***************************************** """ if self.dict_workflow['site_frequencies']: #Open our list print("[Protocols:Homology] Opening our MSA") file_output.write("[Protocols:Homology] Opening our MSA\n") list_msa = open_pact_file(self.directory + self.output_prefix + '_' + "list_msa") #Calculate our frequencies print("[Protocols:Homology] Calculate our frequencies") file_output.write( "[Protocols:Homology] Calculate our frequencies\n") dict_freq = obj_homology.msa_freq(list_msa) #Save our CSV heatmap print("[Protocols:Homology] Saving the frequencies heatmap") file_output.write( "[Protocols:Homology] Saving the frequencies heatmap\n") obj_homology.freq_output_heat(dict_freq) #Save our PACT File print("[Protocols:Homology] Saving a Freq .pact file") file_output.write( save_pact_file( dict_freq, self.directory + self.output_prefix + '_' + "freq") + "\n") """ ***************************************** Read stored .pact files ***************************************** """ if self.dict_workflow['pssm_reader']: #Open our PACT File print("[Protocols:" + str_protocol_name + "] Opening a PSSM .pact file") dict_pssm = open_pact_file(self.directory + self.output_prefix + '_' + "PSSM") #Count our classifiers print("[Protocols:" + str_protocol_name + "] PSSM Classifier Count") file_output.write("[Protocols:" + str_protocol_name + "] PSSM Classifier Count") """ ***************************************** Pact Combine Section ***************************************** """ if self.dict_workflow['combinepact']: #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print( "[Protocols:Homology Error] The combinepact config file is incorrect." ) print( "[Protocols:Homology Error] There is something wrong with the [combinepact] section." ) quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print( "[Protocols:Homology Error] combine_pact was not found." ) #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** Analysis Section ***************************************** """ if self.dict_workflow['analysis_sitefitness_homology']: #Which dataset do we want? if self.obj_cfgparser.get('analysis_sitefitness_homology', 'dataset_x') == "site_frequencies": file_name = "freq" else: file_name = "pssm" #Open our dict print("[Protocols:Homology] Opening our homology data") file_output.write( "[Protocols:Homology] Opening our site freqs\n") dict_homology = open_pact_file(self.directory + self.output_prefix + '_' + file_name) #Plot our data if self.obj_cfgparser.get('analysis_sitefitness_homology', 'scatter') == "True": print("[Protocols:Homology] Plotting the figure") file_output.write( "[Protocols:Homology] Plotting the figure\n") obj_homology.analysis_site_fit_homology_plot( dict_homology, dict_merged_datasets, file_name) #Plot our data print("[Protocols:Homology] Making our classifier table") file_output.write( "[Protocols:Homology] Making our classifier table\n") obj_homology.analysis_site_fit_homology_classifier( dict_homology, dict_merged_datasets, file_name) return
def protocol(self): """Main entrypoint for the protocol""" #Create a output log file that we can append to with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine (Required) ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print("[Protocols:" + str_protocol_name + " Error] The combinepact config file is incorrect.") print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [combinepact] section.") quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print("[Protocols:" + str_protocol_name + " Error] combine_pact was not found.") #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** Classify our mutations ***************************************** """ #Build a dict_classified with [location][mutation] = "DEL/NEU/BEN/NONE" #Get the config file elements try: class_column = self.obj_cfgparser.get("variant_classification", "class_column").lower() class_threshold = float(self.obj_cfgparser.get("variant_classification", "class_threshold")) except NoOptionError: print("[Protocols:" + str_protocol_name + " Error] Missing [variant_classification] config file elements.") quit() except ValueError: print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.") quit() except TypeError: print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.") quit() #Make a dict to add our classifications into dict_classified = {} #Classify each dataset for dataset in dict_merged_datasets: #Add if not existing if dataset not in dict_classified: dict_classified[dataset] = {} #Loop the locations for loc in dict_merged_datasets[dataset]: #Add a new location if not in the dict if loc not in dict_classified[dataset]: dict_classified[dataset][loc] = {} #Loop the muts for mut in dict_merged_datasets[dataset][loc]: #Skip WT, stop, and NaN if (mut == dict_merged_datasets[dataset][loc][mut]['wt_residue'] or mut == "*" or dict_merged_datasets[dataset][loc][mut][class_column] == "NaN"): dict_classified[dataset][loc][mut] = "UNCLASSIFIED" continue #Get the fitness value from the dataset mut_value = float(dict_merged_datasets[dataset][loc][mut][class_column]) #Assign a classification of deleterious, slightly deleterious, or neutral if mut_value <= (-1 * class_threshold): dict_classified[dataset][loc][mut] = "DEL" elif (mut_value > (-1 * class_threshold) and mut_value < class_threshold): dict_classified[dataset][loc][mut] = "NEU" elif mut_value >= class_threshold: dict_classified[dataset][loc][mut] = "BEN" """ ***************************************** Count the basal classifiers ***************************************** """ if self.dict_workflow['basal_count']: #Import our class try: from pact.analysis.basal_count import basal_count except ImportError: print("[Protocols:" + str_protocol_name + " Error] pact.analysis.basal_count was not found.") quit() #Create our object obj_basal = basal_count(self.obj_cfgparser, self.dict_programs, {}) #Count our basal rates for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset) file_output.write("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset) file_output.write(obj_basal.basal_count(dict_classified[dataset]) + "\n") """ ***************************************** DNA Filtering/Alignment or PSSM Object ***************************************** """ if (self.dict_workflow['blastp_align_filter'] or self.dict_workflow['pssm'] or self.dict_workflow['pssm_reader']): #Import Check Output from subprocess import check_output #Import our class try: from pact.analysis.sequence.homology_pssm import homology_classifier except ImportError: print("[Protocols:" + str_protocol_name + " Error] pact.analysis.sequence.homology_pssm was not found.") quit() #Create our object obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Count our classifiers print("[Protocols:" + str_protocol_name + "] Homology PSSM") file_output.write("[Protocols:" + str_protocol_name + "] Homology PSSM") """ ***************************************** DNA Filtering/Alignment ***************************************** """ if self.dict_workflow['blastp_align_filter']: #Convert our XML file print("[Protocols:" + str_protocol_name + "] xml_to_fasta") file_output.write("[Protocols:" + str_protocol_name + "] xml_to_fasta\n") obj_homology.xml_to_fasta() #Run CD-HIT on our new fasta file print("[Protocols:" + str_protocol_name + "] cdhit") file_output.write("[Protocols:" + str_protocol_name + "] cdhit\n") #Check to see if the number of processes is logical self.processes = self.obj_cfgparser.get("blastp_align_filter", "processes") if int(self.processes) <= 0: self.processes = "2" check_output([self.dict_programs['cdhit'], "-i", self.directory + self.output_prefix + ".fa", "-o", self.directory + self.output_prefix + ".afa", "-c", str(self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")), "-M", "40000", "-T", str(self.processes)]) #Check to see if we have WT in our cdhit output print("[Protocols:" + str_protocol_name + "] cdhit_wtcheck") file_output.write("[Protocols:" + str_protocol_name + "] cdhit_wtcheck\n") obj_homology.cdhit_wt_check() #Run MUSCLE on our new fasta file print("[Protocols:" + str_protocol_name + "] muscle") file_output.write("[Protocols:" + str_protocol_name + "] muscle\n") check_output([self.dict_programs['muscle'], "-in", self.directory + self.output_prefix + ".afa", "-out", self.directory + self.output_prefix + ".msa"]) #Process our MSA (needs to be on for PSIBlast) print("[Protocols:" + str_protocol_name + "] processmsa") file_output.write("[Protocols:" + str_protocol_name + "] processmsa\n") list_msa = obj_homology.process_msa() #Save our list print("[Protocols:" + str_protocol_name + "] Saving our MSA") file_output.write("[Protocols:" + str_protocol_name + "] Saving our MSA\n") save_pact_file(list_msa, self.directory + self.output_prefix + '_' + "list_msa") """ ***************************************** PSSM ***************************************** """ if self.dict_workflow['pssm']: #Open our list print("[Protocols:" + str_protocol_name + "] Opening our MSA") file_output.write("[Protocols:" + str_protocol_name + "] Opening our MSA\n") list_msa = open_pact_file(self.directory + self.output_prefix + '_' + "list_msa") #Split our msa for PSIBlast (needs to be on for PSIBlast) print("[Protocols:" + str_protocol_name + "] msa_split") file_output.write("[Protocols:" + str_protocol_name + "] msa_split\n") list_pbcmds = obj_homology.msa_split(list_msa) #Run PSIBlast print("[Protocols:" + str_protocol_name + "] psiblast") file_output.write("[Protocols:" + str_protocol_name + "] psiblast\n") for command in list_pbcmds: check_output([self.dict_programs['psiblast'], *command]) #Import our PSSM data print("[Protocols:" + str_protocol_name + "] pssm_file_import") file_output.write("[Protocols:" + str_protocol_name + "] pssm_file_import\n") dict_pssm = obj_homology.pssm_file_import() #Save our heatmap print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv heatmap") file_output.write(obj_homology.pssm_output_heat(dict_pssm) + "\n") #Save our csv print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv column data") file_output.write(obj_homology.pssm_output_csv(dict_pssm) + "\n") #Save our PACT File print("[Protocols:" + str_protocol_name + "] Saving a PSSM .pact file") file_output.write(save_pact_file(dict_pssm, self.directory + self.output_prefix + '_' + "PSSM") + "\n") """ ***************************************** Read stored PSSM files ***************************************** """ if self.dict_workflow['pssm_reader'] or self.dict_workflow['wt_consensus']: #Open our PACT File print("[Protocols:" + str_protocol_name + "] Opening a PSSM .pact file") dict_pssm = open_pact_file(self.directory + self.output_prefix + '_' + "PSSM") #Count our classifiers print("[Protocols:" + str_protocol_name + "] PSSM Classifier Count") file_output.write("[Protocols:" + str_protocol_name + "] PSSM Classifier Count") for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] PSSM Fitness Rates for dataset: " + dataset) file_output.write("[Protocols:" + str_protocol_name + "] PSSM Fitness Rates for dataset: " + dataset) file_output.write(obj_homology.classified_count_pssm(dict_pssm, dict_classified[dataset]) + "\n") print("[Protocols:" + str_protocol_name + "] Wrote CSV of fitness values categorized by PSSM group and mutation type for dataset: " + dataset) file_output.write("[Protocols:" + str_protocol_name + "] PSSM Fitness Rates for dataset: " + dataset) file_output.write(obj_homology.classified_count_pssm_csv(dict_pssm, dict_classified[dataset], dict_merged_datasets, dataset, class_column) + "\n") """ ***************************************** PDB Import Section ***************************************** """ #Only import and run if selected if self.dict_workflow['pdb_import']: #Check to see if the section is there if not self.obj_cfgparser.has_section('pdb_import'): print("[Protocols:" + str_protocol_name + " Error] The pdb_import config file is incorrect.") print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [pdb_import] section.") quit() #Import our combinepact class try: from pact.analysis.pdb_import import pdb_import except ImportError: print("[Protocols:" + str_protocol_name + " Error] pdb_import was not found.") #Create the object then call the merger obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #The dict will be like {'pdb name': {data... dict_pdb = obj_pdb.pdb_import() """ ***************************************** Back to Consensus Analyses ***************************************** """ if self.dict_workflow['consensus']: #Import our class try: from pact.analysis.sequence.consensus import consensus except ImportError: print("[Protocols:" + str_protocol_name + "] pact.analysis.basal_count was not found.") quit() #Create our object obj_consensus = consensus(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Get the wild-type sequence information dict_wtcons = obj_consensus.wt_consensus(dict_pssm) #Get the prob of finding a classified mutation for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Mut Classification vs WT Cons for dataset: " + dataset) obj_consensus.wtcons_count_class(dict_wtcons, dict_classified[dataset], dataset) #Get the prob of finding a classified mutation for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Mutating a non-conserved site to a conserved site: " + dataset) file_output.write(obj_consensus.nonconserved_sites(dict_wtcons, dict_pssm, dict_classified[dataset], dataset)) #Get the prob of finding a classified mutation for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Mutating a non-conserved site to a non-conserved mutation: " + dataset) file_output.write(obj_consensus.nonconserved_mutations(dict_wtcons, dict_pssm, dict_classified[dataset], dataset)) #Get the cross set distribution obj_consensus.cons_count_setvset(dict_wtcons, dict_pssm, dict_classified) if self.dict_workflow['pdb_import']: for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Mutating a non-conserved site to a conserved site (Buried Residues Only): " + dataset) file_output.write(obj_consensus.nonconserved_sites_burial( dict_wtcons, dict_pssm, dict_classified[dataset], dataset, dict_pdb, "<")) for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Mutating a non-conserved site to a conserved site (Surface Residues Only): " + dataset) file_output.write(obj_consensus.nonconserved_sites_burial( dict_wtcons, dict_pssm, dict_classified[dataset], dataset, dict_pdb, ">=")) return
def protocol(self): """Main entrypoint for the protocol""" #Create a output log file that we can append to with open( self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine (Required) ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print( "[Protocols:Shannon Entropy Error] The combinepact config file is incorrect." ) print( "[Protocols:Shannon Entropy Error] There is something wrong with the [combinepact] section." ) quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print( "[Protocols:Shannon Entropy Error] combine_pact was not found." ) #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** Shannon Class Import ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('shannon_entropy'): print( "[Protocols:Shannon Entropy Error] The combinepact config file is incorrect." ) print( "[Protocols:Shannon Entropy Error] There is something wrong with the [combinepact] section." ) quit() #Import our SE class try: from pact.analysis.sequence.shannon_entropy import shannon_entropy_classifier except ImportError: print( "[Protocols:Shannon Entropy Error] Cannot load pact.analysis.sequence.shannon_entropy" ) quit() #Create our object obj_se = shannon_entropy_classifier(self.obj_cfgparser, self.dict_programs, {'directory': self.directory}) #Run our object dict_entropy = obj_se.shannon_entropy(dict_merged_datasets) return
def protocol(self): """Main entrypoint for the protocol""" #Create a output log file that we can append to with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine ***************************************** """ if self.dict_workflow['combinepact']: #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print("[Protocols:" + str_protocol_name + " Error] The combinepact config file is incorrect.") print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [combinepact] section.") quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print("[Protocols:" + str_protocol_name + " Error] combine_pact was not found.") #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** Classify our mutations ***************************************** """ #Build a dict_classified with [location][mutation] = "DEL/NEU/BEN/NONE" #Get the config file elements try: class_column = self.obj_cfgparser.get("variant_classification", "class_column").lower() class_threshold = float(self.obj_cfgparser.get("variant_classification", "class_threshold")) except NoOptionError: print("[Protocols:" + str_protocol_name + " Error] Missing [variant_classification] config file elements.") quit() except ValueError: print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.") quit() except TypeError: print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.") quit() #Make a dict to add our classifications into dict_classified = {} #Classify each dataset for dataset in dict_merged_datasets: #Add if not existing if dataset not in dict_classified: dict_classified[dataset] = {} #Loop the locations for loc in dict_merged_datasets[dataset]: #Add a new location if not in the dict if loc not in dict_classified[dataset]: dict_classified[dataset][loc] = {} #Loop the muts for mut in dict_merged_datasets[dataset][loc]: #Skip WT, stop, and NaN if (mut == dict_merged_datasets[dataset][loc][mut]['wt_residue'] or mut == "*" or dict_merged_datasets[dataset][loc][mut][class_column] == "NaN"): dict_classified[dataset][loc][mut] = "UNCLASSIFIED" continue #Get the fitness value from the dataset mut_value = float(dict_merged_datasets[dataset][loc][mut][class_column]) #Assign a classification of deleterious, slightly deleterious, or neutral if mut_value <= (-1 * class_threshold): dict_classified[dataset][loc][mut] = "DEL" elif (mut_value > (-1 * class_threshold) and mut_value < class_threshold): dict_classified[dataset][loc][mut] = "NEU" elif mut_value >= class_threshold: dict_classified[dataset][loc][mut] = "BEN" #if mut_value < -1: # dict_classified[dataset][loc][mut] = "DEL" #elif mut_value >= -1 and mut_value < -0.3: # dict_classified[dataset][loc][mut] = "NEU" #elif mut_value >= -0.3: # dict_classified[dataset][loc][mut] = "BEN" """ ***************************************** Count the basal classifiers ***************************************** """ if self.dict_workflow['basal_count']: #Import our class try: from pact.analysis.basal_count import basal_count except ImportError: print("[Protocols:" + str_protocol_name + " Error] pact.analysis.basal_count was not found.") quit() #Create our object obj_basal = basal_count(self.obj_cfgparser, self.dict_programs, {}) #Count our basal rates for dataset in dict_classified: print("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset) file_output.write("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset) file_output.write(obj_basal.basal_count(dict_classified[dataset]) + "\n") """ ***************************************** DNA Filtering/Alignment or PSSM Object ***************************************** """ if (self.dict_workflow['blastp_align_filter'] or self.dict_workflow['pssm'] or self.dict_workflow['pssm_reader']): #Import Check Output from subprocess import check_output #Import our class try: from pact.analysis.sequence.homology_pssm import homology_classifier except ImportError: print("[Protocols:" + str_protocol_name + " Error] pact.analysis.sequence.homology_pssm was not found.") quit() #Create our object obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Count our classifiers print("[Protocols:" + str_protocol_name + "] Homology PSSM") file_output.write("[Protocols:" + str_protocol_name + "] Homology PSSM") """ ***************************************** DNA Filtering/Alignment ***************************************** """ if self.dict_workflow['blastp_align_filter']: #Convert our XML file print("[Protocols:" + str_protocol_name + "] xml_to_fasta") file_output.write("[Protocols:" + str_protocol_name + "] xml_to_fasta\n") obj_homology.xml_to_fasta() #Run CD-HIT on our new fasta file print("[Protocols:" + str_protocol_name + "] cdhit") file_output.write("[Protocols:" + str_protocol_name + "] cdhit\n") #Check to see if the number of processes is logical self.processes = self.obj_cfgparser.get("blastp_align_filter", "processes") if int(self.processes) <= 0: self.processes = "2" check_output([self.dict_programs['cdhit'], "-i", self.directory + self.output_prefix + ".fa", "-o", self.directory + self.output_prefix + ".afa", "-c", str(self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")), "-M", "40000", "-T", str(self.processes)]) #Check to see if we have WT in our cdhit output print("[Protocols:" + str_protocol_name + "] cdhit_wtcheck") file_output.write("[Protocols:" + str_protocol_name + "] cdhit_wtcheck\n") obj_homology.cdhit_wt_check() #Run MUSCLE on our new fasta file print("[Protocols:" + str_protocol_name + "] muscle") file_output.write("[Protocols:" + str_protocol_name + "] muscle\n") check_output([self.dict_programs['muscle'], "-in", self.directory + self.output_prefix + ".afa", "-out", self.directory + self.output_prefix + ".msa"]) #Process our MSA (needs to be on for PSIBlast) print("[Protocols:" + str_protocol_name + "] processmsa") file_output.write("[Protocols:" + str_protocol_name + "] processmsa\n") list_msa = obj_homology.process_msa() #Save our list print("[Protocols:" + str_protocol_name + "] Saving our MSA") file_output.write("[Protocols:" + str_protocol_name + "] Saving our MSA\n") save_pact_file(list_msa, self.directory + self.output_prefix + '_' + "list_msa") """ ***************************************** PSSM ***************************************** """ if self.dict_workflow['pssm']: #Open our list print("[Protocols:" + str_protocol_name + "] Opening our MSA") file_output.write("[Protocols:" + str_protocol_name + "] Opening our MSA\n") list_msa = open_pact_file(self.directory + self.output_prefix + '_' + "list_msa") #Split our msa for PSIBlast (needs to be on for PSIBlast) print("[Protocols:" + str_protocol_name + "] msa_split") file_output.write("[Protocols:" + str_protocol_name + "] msa_split\n") list_pbcmds = obj_homology.msa_split(list_msa) #Run PSIBlast print("[Protocols:" + str_protocol_name + "] psiblast") file_output.write("[Protocols:" + str_protocol_name + "] psiblast\n") for command in list_pbcmds: check_output([self.dict_programs['psiblast'], *command]) #Import our PSSM data print("[Protocols:" + str_protocol_name + "] pssm_file_import") file_output.write("[Protocols:" + str_protocol_name + "] pssm_file_import\n") dict_pssm = obj_homology.pssm_file_import() #Save our heatmap print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv heatmap") file_output.write(obj_homology.pssm_output_heat(dict_pssm) + "\n") #Save our csv print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv column data") file_output.write(obj_homology.pssm_output_csv(dict_pssm) + "\n") #Save our PACT File print("[Protocols:" + str_protocol_name + "] Saving a PSSM .pact file") file_output.write(save_pact_file(dict_pssm, self.directory + self.output_prefix + '_' + "PSSM") + "\n") """ ***************************************** Read stored PSSM files ***************************************** """ if (self.dict_workflow['pssm_reader'] or self.dict_workflow['consensus']): #Open our PACT File print("[Protocols:" + str_protocol_name + "] Opening a PSSM .pact file") dict_pssm = open_pact_file(self.directory + self.output_prefix + '_' + "PSSM") """ ***************************************** PDB Import Section ***************************************** """ #Only import and run if selected if self.dict_workflow['pdb_import']: #Check to see if the section is there if not self.obj_cfgparser.has_section('pdb_import'): print("[Protocols:" + str_protocol_name + " Error] The pdb_import config file is incorrect.") print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [pdb_import] section.") quit() #Import our class try: from pact.analysis.pdb_import import pdb_import except ImportError: print("[Protocols:" + str_protocol_name + " Error] pdb_import was not found.") #Create the object then call the merger obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #The dict will be like {'pdb name': {data... dict_pdb = obj_pdb.pdb_import() """ ***************************************** Back to Consensus Analyses ***************************************** """ if self.dict_workflow['consensus']: #Import our class try: from pact.analysis.sequence.consensus import consensus except ImportError: print("[Protocols:" + str_protocol_name + "] pact.analysis.basal_count was not found.") quit() #Create our object obj_consensus = consensus(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Get the wild-type sequence information dict_wtcons = obj_consensus.wt_consensus(dict_pssm) """ ***************************************** Residue Chemical/Size ***************************************** """ if self.dict_workflow['residue_chemical_size']: #Import our residue_chemical_size class try: from pact.analysis.sequence.residue_chemical_size import residue_chemical_size except ImportError: print("[Protocols:Enzyme Solubility Error] residue_chemical_size was not found.") #Create the object then call the merger obj_rcs = residue_chemical_size(self.obj_cfgparser, self.dict_programs, {}) """ ***************************************** Distance to Active Site ***************************************** """ #Only import and run if selected if self.dict_workflow['distance_to_active']: #Check to see if the section is there if not self.obj_cfgparser.has_section('distance_to_active'): print("[Protocols:Enzyme Solubility Error] The distance_to_active config file is incorrect.") print("[Protocols:Enzyme Solubility Error] There is something wrong with the [distance_to_active] section.") quit() #Import our class try: from pact.analysis.structure.dist_to_active import dist_to_active except ImportError: print("[Protocols:Enzyme Solubility] pact.analysis.structure.dist_to_active was not found.") quit() #Create our object obj_dtoa = dist_to_active(self.obj_cfgparser, self.dict_programs, {}) #Calculate the distance dict_dtoa_dist = obj_dtoa.dta_dist(dict_pdb) """ ***************************************** Contact Number ***************************************** """ #Only import and run if selected if self.dict_workflow['contact_number']: #Check to see if the section is there if not self.obj_cfgparser.has_section('contact_number'): print("[Protocols:Enzyme Solubility Error] The contact_number config file is incorrect.") print("[Protocols:Enzyme Solubility Error] There is something wrong with the [contact_number] section.") quit() #Import our class try: from pact.analysis.structure.contact_number import contact_number except ImportError: print("[Protocols:Enzyme Solubility] pact.analysis.structure.contact_number was not found.") quit() #Create our object obj_contact = contact_number(self.obj_cfgparser, self.dict_programs, {}) #Calculate the distance dict_contact = obj_contact.contact_number(dict_pdb) """ ***************************************** Output CSV and .pact ***************************************** """ quit() aa_table = 'ACDEFGHIKLMNPQRSTVWY' wtaa = self.obj_cfgparser.get('global', 'wtaa').upper() if self.dict_workflow['pdb_import']: chain = self.obj_cfgparser.get('classification_analysis', 'chain').upper() pdb_file = self.obj_cfgparser.get('classification_analysis', 'pdb_file') list_pdb_sites = sorted([x for x in dict_pdb[pdb_file]['dssp'][chain]]) #Make a dict to work into dict_output = {} #Get the dataset name in order if self.obj_cfgparser.has_section("combinepact"): num_datasets = int(self.obj_cfgparser.get('combinepact', 'numdatasets')) list_datasets = [self.obj_cfgparser.get('combinepact', 'dataset_' + str(int_dataset)) for int_dataset in range(1, num_datasets + 1)] else: list_datasets = [] #Get the header str_output = ','.join([ "Location", "Mutation", ','.join([dataset + "_fitness" for dataset in list_datasets]), ','.join([dataset + "_sd_from_wt" for dataset in list_datasets]), ','.join([dataset + "_classified" for dataset in list_datasets]), "wt_resi", "wt_pssm", "wt_percent", "max_pssm", "max_percent", "pssm_cons_count", "percent_cons_count", "wt_max_pssm", "wt_max_percent", "mut_pssm", "mut_percent", "frac_burial", "polarity", "aromatics", "philic_phobic", "size", "hydropathy", "dist_to_active", "contact_number" ]) + "\n" #Loop the locations for loc in range(1, len(wtaa) + 1): #Add to dict if not already added if loc not in dict_output: dict_output[loc] = {} #Loop the mutations for mut in aa_table: #Add to dict if not already added if mut not in dict_output[loc]: dict_output[loc][mut] = {} #Get the location str_output = str_output + str(loc) + ',' #Get the mutation str_output = str_output + mut + ',' #Get the datasets, test if loc is in there str_output = str_output + ','.join([str(dict_merged_datasets[dataset][loc][mut]['fitness']) if loc in dict_merged_datasets[dataset] else " " for dataset in list_datasets ]) + ',' str_output = str_output + ','.join([str(dict_merged_datasets[dataset][loc][mut]['sd_from_wt']) if loc in dict_merged_datasets[dataset] else " " for dataset in list_datasets ]) + ',' #Get the classified str_output = str_output + ','.join([str(dict_classified[dataset][loc][mut]) if loc in dict_classified[dataset] else " " for dataset in list_datasets ]) + ',' #Get the WT Consensus Data if self.dict_workflow['consensus']: str_output = str_output + ','.join(map(str, [ dict_wtcons[loc]['wt_resi'], dict_wtcons[loc]['wt_pssm'], dict_wtcons[loc]['wt_percent'], dict_wtcons[loc]['max_pssm'], dict_wtcons[loc]['max_percent'], dict_wtcons[loc]['pssm_cons_count'], dict_wtcons[loc]['percent_cons_count'], dict_wtcons[loc]['wt_max_pssm'], dict_wtcons[loc]['wt_max_percent'], ])) + ',' else: str_output = str_output + "NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN," #Get the PSSM data if self.dict_workflow['pssm_reader']: str_output = str_output + dict_pssm[loc][mut][0] + "," str_output = str_output + dict_pssm[loc][mut][1] + "," else: str_output = str_output + "NaN,NaN," #Get the fraction burial if self.dict_workflow['pdb_import']: if loc in list_pdb_sites: str_output = str_output + str(dict_pdb[pdb_file]['dssp'][chain][loc]['frac_burial']) + "," else: str_output = str_output + "NaN," else: str_output = str_output + "NaN," #To/From Proline if self.dict_workflow['residue_chemical_size']: dict_rcs_mut = obj_rcs.mut_info(wtaa[loc - 1], mut) str_output = str_output + ','.join([dict_rcs_mut['polarity'], dict_rcs_mut['aromatics'], dict_rcs_mut['philic_phobic'], dict_rcs_mut['size'], str(dict_rcs_mut['hydropathy']), ]) + ',' else: str_output = str_output + "NaN,NaN,NaN,NaN,NaN," #Dist to active site if self.dict_workflow['distance_to_active']: str_output = str_output + str(min(dict_dtoa_dist[chain][loc])) + ',' else: str_output = str_output + "NaN," #Contact number if self.dict_workflow['contact_number']: str_output = str_output + str(len(dict_contact[chain][loc])) + ',' else: str_output = str_output + "NaN," #Newline str_output = str_output + '\n' #Output a csv file with open(self.directory + self.output_prefix + '_dataset.csv', 'w') as file_output: file_output.write(str_output) #At this point it's easier to backcalculate the csv file list_output = str_output.splitlines() list_keys = list_output[0].rstrip('\n').split(',') columns = len(list_keys) #Parse the lines for line in list_output[1:]: #Split the line splitline = line.split(',') #Parse each column for i in range(2, columns): dict_output[int(splitline[0])][splitline[1]][list_keys[i]] = splitline[i] #Output a pact file print(save_pact_file(dict_output, self.directory + self.output_prefix + '_dataset')) return
def protocol(self): """Provide a protocol that does general analyses that don't need a full protocol""" #Create a output log file that we can append to with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine (Required) ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print("[Protocols:Analysis Error] The combinepact config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [combinepact] section.") quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print("[Protocols:Analysis Error] combine_pact was not found.") #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #Print Section Progress print("[Protocols:Analysis] Combine PACT") #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** T-Test of Two Groups ***************************************** """ if self.dict_workflow['aa_compare_ttest']: #Check to see if the section is there if not self.obj_cfgparser.has_section('aa_compare_ttest'): print("[Protocols:Analysis Error] The aa_compare_ttest config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [aa_compare_ttest] section.") quit() #Import our class try: from pact.analysis.sequence.aa_fitmet_compare import aa_fitmet_compare except ImportError: print("[Protocols:Analysis Error] aa_fitmet_compare was not found.") #Create the object then call the merger obj_aac = aa_fitmet_compare(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Run the main routine print("[Protocols:Analysis] T-Test of amino acid groups") file_output.write(obj_aac.aa_fitmet_compare(dict_merged_datasets)) """ ***************************************** Count our mutations ***************************************** """ if self.dict_workflow['threshold_count']: #Check to see if the section is there if not self.obj_cfgparser.has_section('threshold_count'): print("[Protocols:Analysis Error] The threshold_count config file is incorrect.") print("[Protocols:Analysis Error] There is something wrong with the [threshold_count] section.") quit() #Create our object try: from pact.analysis.sequence.threshold_count import threshold_count except ImportError: print("[Protocols:Analysis Error] threshold_count was not found.") #Create the object then call the analysis obj_tc = threshold_count(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #Count them print("[Protocols:Analysis] Count of mutations above and below a cutoff") file_output.write(obj_tc.threshold_count(dict_merged_datasets)) return
def protocol(self): """Main entrypoint for the protocol""" #Create a output log file that we can append to with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") + '_output.txt', 'w') as file_output: file_output.write(self.pact_preamble + "\n") """ ***************************************** Pact Combine (Required) ***************************************** """ #Check to see if the section is there if not self.obj_cfgparser.has_section('combinepact'): print("[Protocols:Enzyme Solubility Error] The combinepact config file is incorrect.") print("[Protocols:Enzyme Solubility Error] There is something wrong with the [combinepact] section.") quit() #Import our combinepact class try: from pact.analysis.combine_pact import combine_pact except ImportError: print("[Protocols:Enzyme Solubility Error] combine_pact was not found.") #Create the object then call the merger obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {}) #The dict will be like {'dataset name': {data... dict_merged_datasets = obj_combine.combine_pact() """ ***************************************** Classify our mutations ***************************************** """ #Build a dict_classified with [location][mutation] = "DEL/SLIGHTDEL/NEU/NONE" #0.15 in GFP, 80% of WT = neutral, 50% of WT = slightly, <50% of WT = deleterious #Get the config file elements try: screen_dataset = self.obj_cfgparser.get("enzyme_solubility", "dataset_screen") screen_threshold = float(self.obj_cfgparser.get("enzyme_solubility", "screen_threshold")) fitness_dataset = self.obj_cfgparser.get("enzyme_solubility", "dataset_fitness") fitness_neutral = float(self.obj_cfgparser.get("enzyme_solubility", "fitness_neu")) fitness_slightdel = float(self.obj_cfgparser.get("enzyme_solubility", "fitness_slightdel")) except NoOptionError: print("[Enzyme Solubility Error] Missing [enzyme_solubility] config file elements.") quit() except ValueError: print("[Enzyme Solubility Error] Incorrect [enzyme_solubility] config file elements.") quit() except TypeError: print("[Enzyme Solubility Error] Incorrect [enzyme_solubility] config file elements.") quit() #Make a dict to add our classifications into dict_classified = {} dict_basal = {} #Loop the locations for loc in dict_merged_datasets[screen_dataset]: #Add a new location if not in the dict if loc not in dict_classified: dict_classified[loc] = {} #Add a new location if not in the dict if loc not in dict_basal: dict_basal[loc] = {} #Loop the muts for mut in dict_merged_datasets[screen_dataset][loc]: #Skip WT, stop, and NaN if (mut == dict_merged_datasets[fitness_dataset][loc][mut]['wt_residue'] or mut == "*" or dict_merged_datasets[fitness_dataset][loc][mut]['fitness'] == "NaN"): dict_basal[loc][mut] = "UNCLASSIFIED" continue #Get the fitness value from the fitness dataset fitness_value = float(dict_merged_datasets[fitness_dataset][loc][mut]['fitness']) #For the basal screen fitness #Assign a classification of deleterious, slightly deleterious, or neutral if fitness_value < fitness_slightdel: dict_basal[loc][mut] = "DEL" elif (fitness_value >= fitness_slightdel and fitness_value < fitness_neutral): dict_basal[loc][mut] = "SLIGHTDEL" elif fitness_value >= fitness_neutral: dict_basal[loc][mut] = "NEU" #Skip WT, stop, and NaN if (mut == dict_merged_datasets[screen_dataset][loc][mut]['wt_residue'] or mut == "*" or dict_merged_datasets[screen_dataset][loc][mut]['fitness'] == "NaN"): dict_classified[loc][mut] = "UNCLASSIFIED" continue #Are we are enriched in the screen dataset? if float(dict_merged_datasets[screen_dataset][loc][mut]['fitness']) < screen_threshold: dict_classified[loc][mut] = "UNCLASSIFIED" continue #Assign a classification of deleterious, slightly deleterious, or neutral if fitness_value < fitness_slightdel: dict_classified[loc][mut] = "DEL" elif (fitness_value >= fitness_slightdel and fitness_value < fitness_neutral): dict_classified[loc][mut] = "SLIGHTDEL" elif fitness_value >= fitness_neutral: dict_classified[loc][mut] = "NEU" """ ***************************************** Count the basal classifiers ***************************************** """ if self.dict_workflow['basal_count']: #Import our class try: from pact.analysis.basal_count import basal_count except ImportError: print("[Protocols:Enzyme Solubility] pact.analysis.basal_count was not found.") quit() #Create our object obj_basal = basal_count(self.obj_cfgparser, self.dict_programs, {}) #Count our basal rates print("[Protocols:Enzyme Solubility] Basal Screen Counts") file_output.write("[Protocols:Enzyme Solubility] Basal Screen Counts") file_output.write(obj_basal.basal_count(dict_basal) + "\n") print("[Protocols:Enzyme Solubility] Basal Fitness Counts") file_output.write("[Protocols:Enzyme Solubility] Basal Fitness Counts") file_output.write(obj_basal.basal_count(dict_classified) + "\n") """ ***************************************** DNA Filtering/Alignment or PSSM Object ***************************************** """ if self.dict_workflow['blastp_align_filter'] or self.dict_workflow['pssm']: #Import Check Output from subprocess import check_output #Import our class try: from pact.analysis.sequence.homology_pssm import homology_classifier except ImportError: print("[Protocols:Enzyme Solubility] pact.analysis.sequence.homology_pssm was not found.") quit() #Create our object obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) """ ***************************************** DNA Filtering/Alignment ***************************************** """ if self.dict_workflow['blastp_align_filter']: #Convert our XML file print("[Protocols:Enzyme Solubility] xml_to_fasta") file_output.write("[Protocols:Enzyme Solubility] xml_to_fasta\n") obj_homology.xml_to_fasta() #Run CD-HIT on our new fasta file print("[Protocols:Enzyme Solubility] cdhit") file_output.write("[Protocols:Enzyme Solubility] cdhit\n") #Check to see if the number of processes is logical self.processes = self.obj_cfgparser.get("blastp_align_filter", "processes") if int(self.processes) <= 0: self.processes = "2" check_output([self.dict_programs['cdhit'], "-i", self.directory + self.output_prefix + ".fa", "-o", self.directory + self.output_prefix + ".afa", "-c", str(self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")), "-M", "40000", "-T", str(self.processes)]) #Check to see if we have WT in our cdhit output print("[Protocols:Enzyme Solubility] cdhit_wtcheck") file_output.write("[Protocols:Enzyme Solubility] cdhit_wtcheck\n") obj_homology.cdhit_wt_check() #Run MUSCLE on our new fasta file print("[Protocols:Enzyme Solubility] muscle") file_output.write("[Protocols:Enzyme Solubility] muscle\n") check_output([self.dict_programs['muscle'], "-in", self.directory + self.output_prefix + ".afa", "-out", self.directory + self.output_prefix + ".msa"]) #Process our MSA (needs to be on for PSIBlast) print("[Protocols:Enzyme Solubility] processmsa") file_output.write("[Protocols:Enzyme Solubility] processmsa\n") list_msa = obj_homology.process_msa() #Save our list print("[Protocols:Enzyme Solubility] Saving our MSA") file_output.write("[Protocols:Enzyme Solubility] Saving our MSA\n") obj_homology.save_data_structure(list_msa, "list_msa") """ ***************************************** PSSM ***************************************** """ if self.dict_workflow['pssm'] or self.dict_workflow['strict_filter']: #Open our list print("[Protocols:Enzyme Solubility] Opening our MSA") file_output.write("[Protocols:Enzyme Solubility] Opening our MSA\n") list_msa = obj_homology.open_data_structure("list_msa") #Split our msa for PSIBlast (needs to be on for PSIBlast) print("[Protocols:Enzyme Solubility] msa_split") file_output.write("[Protocols:Enzyme Solubility] msa_split\n") list_pbcmds = obj_homology.msa_split(list_msa) #Run PSIBlast print("[Protocols:Enzyme Solubility] psiblast") file_output.write("[Protocols:Enzyme Solubility] psiblast\n") for command in list_pbcmds: check_output([self.dict_programs['psiblast'], *command]) #Import our PSSM data print("[Protocols:Enzyme Solubility] pssm_file_import") file_output.write("[Protocols:Enzyme Solubility] pssm_file_import\n") dict_pssm = obj_homology.pssm_file_import() #Save our heatmap print("[Protocols:Enzyme Solubility] Saving a PSSM .csv heatmap") file_output.write(obj_homology.pssm_output_heat(dict_pssm) + "\n") #Save our csv print("[Protocols:Enzyme Solubility] Saving a PSSM .csv column data") file_output.write(obj_homology.pssm_output_csv(dict_pssm) + "\n") #Save our PACT File print("[Protocols:Enzyme Solubility] Saving a PSSM .pact file") file_output.write(obj_homology.save_data_structure(dict_pssm, "PSSM") + "\n") #Count our classifiers print("[Protocols:Enzyme Solubility] PSSM") file_output.write("[Protocols:Enzyme Solubility] PSSM") print("Fitness Rates:") file_output.write("Fitness Rates:") file_output.write(obj_homology.classified_count_pssm(dict_pssm, dict_basal) + "\n") print("\nScreen Rates:") file_output.write("\nScreen Rates:") file_output.write(obj_homology.classified_count_pssm(dict_pssm, dict_classified) + "\n") """ ***************************************** Residue Chemical/Size ***************************************** """ if self.dict_workflow['residue_chemical_size']: #Import our residue_chemical_size class try: from pact.analysis.sequence.residue_chemical_size import residue_chemical_size except ImportError: print("[Protocols:Enzyme Solubility Error] residue_chemical_size was not found.") #Create the object then call the merger obj_rcs = residue_chemical_size(self.obj_cfgparser, self.dict_programs, {}) #Return the process dict {1: {'A': {'' dict_rcs = obj_rcs.process_dataset(dict_merged_datasets) #Count our classifiers print("[Protocols:Enzyme Solubility] Residue Chemical/Size") file_output.write("[Protocols:Enzyme Solubility] Residue Chemical/Size") print("Fitness Rates:") file_output.write("Fitness Rates:") file_output.write(obj_rcs.classified_count(dict_rcs, fitness_dataset, dict_basal) + "\n") print("\nScreen Rates:") file_output.write("\nScreen Rates:") file_output.write(obj_rcs.classified_count(dict_rcs, screen_dataset, dict_classified) + "\n") """ ***************************************** PDB Import Section ***************************************** """ #Only import and run if selected if (self.dict_workflow['pdb_import'] or self.dict_workflow['distance_to_active'] or self.dict_workflow['contact_number'] or self.dict_workflow['strict_filter']): #Check to see if the section is there if not self.obj_cfgparser.has_section('pdb_import'): print("[Protocols:Enzyme Solubility Error] The pdb_import config file is incorrect.") print("[Protocols:Enzyme Solubility Error] There is something wrong with the [pdb_import] section.") quit() #Import our combinepact class try: from pact.analysis.pdb_import import pdb_import except ImportError: print("[Protocols:Enzyme Solubility Error] pdb_import was not found.") #Create the object then call the merger obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory}) #The dict will be like {'pdb name': {data... dict_pdb = obj_pdb.pdb_import() """ ***************************************** Distance to Active Site ***************************************** """ #Only import and run if selected if self.dict_workflow['distance_to_active'] or self.dict_workflow['strict_filter']: #Check to see if the section is there if not self.obj_cfgparser.has_section('distance_to_active'): print("[Protocols:Enzyme Solubility Error] The distance_to_active config file is incorrect.") print("[Protocols:Enzyme Solubility Error] There is something wrong with the [distance_to_active] section.") quit() #Import our class try: from pact.analysis.structure.dist_to_active import dist_to_active except ImportError: print("[Protocols:Enzyme Solubility] pact.analysis.structure.dist_to_active was not found.") quit() #Create our object obj_dtoa = dist_to_active(self.obj_cfgparser, self.dict_programs, {}) #Calculate the distance dict_dtoa_dist = obj_dtoa.dta_dist(dict_pdb) #Count our classifiers print("[Protocols:Enzyme Solubility] Distance to Active Site") file_output.write("[Protocols:Enzyme Solubility] Distance to Active Site") print("Fitness Rates:") file_output.write("Fitness Rates:") file_output.write(obj_dtoa.classified_count(dict_dtoa_dist, dict_basal) + "\n") print("\nScreen Rates:") file_output.write("\nScreen Rates:") file_output.write(obj_dtoa.classified_count(dict_dtoa_dist, dict_classified) + "\n") """ ***************************************** Contact Number ***************************************** """ #Only import and run if selected if self.dict_workflow['contact_number'] or self.dict_workflow['strict_filter']: #Check to see if the section is there if not self.obj_cfgparser.has_section('contact_number'): print("[Protocols:Enzyme Solubility Error] The contact_number config file is incorrect.") print("[Protocols:Enzyme Solubility Error] There is something wrong with the [contact_number] section.") quit() #Import our class try: from pact.analysis.structure.contact_number import contact_number except ImportError: print("[Protocols:Enzyme Solubility] pact.analysis.structure.contact_number was not found.") quit() #Create our object obj_contact = contact_number(self.obj_cfgparser, self.dict_programs, {}) #Calculate the distance dict_contact = obj_contact.contact_number(dict_pdb) #Count our classifiers print("[Protocols:Enzyme Solubility] Contact Number") file_output.write("[Protocols:Enzyme Solubility] Contact Number") print("Fitness Rates:") file_output.write("Fitness Rates:") file_output.write(obj_contact.classified_count(dict_contact, dict_basal) + "\n") print("\nScreen Rates:") file_output.write("\nScreen Rates:") file_output.write(obj_contact.classified_count(dict_contact, dict_classified) + "\n") """ ***************************************** Strict Enzyme Filter ***************************************** """ #Only import and run if selected if self.dict_workflow['strict_filter']: print("[Protocols:Enzyme Solubility] Strict Enzyme Filter") file_output.write("[Protocols:Enzyme Solubility] Strict Enzyme Filter") #Check if the dicts exist if 'dict_pssm' not in locals(): print("[Protocols:Enzyme Solubility] Missing PSSM Data") file_output.write("[Protocols:Enzyme Solubility] Missing PSSM Data") quit() if 'dict_contact' not in locals(): print("[Protocols:Enzyme Solubility] Missing Contact Number Data") file_output.write("[Protocols:Enzyme Solubility] Missing Contact Number Data") quit() if 'dict_dtoa_dist' not in locals(): print("[Protocols:Enzyme Solubility] Missing Active Site Distance Data") file_output.write("[Protocols:Enzyme Solubility] Missing Active Site Distance Data") quit() #Implement a filter that #PSSM >= 0 #Distance to Active >= 15A #Contact Number <= 16 #No proline mutations #Create a list to work into list_strictfilter = [] #Loop the locations for loc in dict_classified: #Loop the mutations for mut in dict_classified[loc]: #Skip PRO, and Stop if (mut == "P" or mut == "*" or dict_merged_datasets[screen_dataset][loc][mut]['wt_residue'] == "P"): continue #Check if PSSM is less than 0 if int(dict_pssm[loc][mut][0]) < 0: continue #Skip residues without location data if loc not in dict_contact or loc not in dict_dtoa_dist: continue #Check if Distance to Active is less than 15A if min(dict_dtoa_dist[loc]) < 15: continue #Check if the contact number is greater than 16 if len(dict_contact[loc]) > 16: continue #Otherwise, add to our list list_strictfilter.append(dict_classified[loc][mut]) #Report str_return = '\n'.join(map(str, [ "Enzyme Strict Filter", pretty_counter_dicts(dict(Counter(list_strictfilter))) ])) print(str_return) file_output.write(str_return) return