pof = analysis.ParseOutputFile(outputdir + "/models_scores_sigmas-Apo.dat", "Apo") pof2 = analysis.ParseOutputFile(outputdir + "/models_scores_sigmas-Apo2.dat", "Apo2") pof.generate_datasets() pof2.generate_datasets() #pof.calculate_random_sample_convergence() #pof2.calculate_random_sample_convergence() conv = analysis.Convergence(pof, pof2, 500) print(conv.total_score_pvalue_and_cohensd()) ranges = [0.01, 0.1, 0.2, 0.3, 0.4] #print(conv.get_clusters(ranges)) #exit() #print(conv.residue_pvalue_and_cohensd()) plots.plot_incorporation_curve_fits(pof, 500, outputdir + "/incorporation_plots/") plots.plot_incorporation_curve_fits(pof2, 500, outputdir + "/incorporation_plots2/") plots.plot_po_model_scores(pof, False, outputdir + "/apo_total_score.png", 500) plots.plot_po_model_scores(pof2, False, outputdir + "/apo2_total_score.png", 500) plots.plot_residue_protection_factors([pof, pof2], num_best_models=500, sort_sectors=True)
sequence = "GMAEDMAADEVTAPPRKVLIISAGASHSVALLSGDIVCSWGRGEDGQLGHGDAEDRPSPTQLSALDGHQIVSVTCGADHTVAYSQSGMEVYSWGWGDFGRLGHGNSSDLFTPLPIKALHGIRIKQIACGDSHCLAVTMEGEVQSWGRNQNGQLGLGDTEDSLVPQKIQAFEGIRIKMVAAGAEHTAAVTEDGDLYGWGWGRYGNLGLGDRTDRLVPERVTSTGGEKMSMVACGWRHTISVSYSGALYTYGWSKYGQLGHGDLEDHLIPHKLEALSNSFISQISGGWRHTMALTSDGKLYGWGWNKFGQVGVGNNLDQCSPVQVRFPDDQKVVQVSCGWRHTLAVTERNNVFAWGRGTNGQLGIGESVDRNFPKIIEALSVDGASGQHIESSNIDPSSGKSWVSPAERYAVVPDETGLTDGSSKGNGGDISVPQTDVKRVRI" # FASTA sequence resrange = (100, 200 ) # Residue range is a tuple in pdb numbering (starts at 1). num_best_models = 200 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ### Analysis. #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Initialize System sys = system.System(output_dir=None) mol = sys.add_macromolecule(sequence, "ERa") state = mol.get_apo_state() #mol.add_state("088074") pof = analysis.ParseOutputFile(outputdir + "/models_scores_sigmas-ERa_Apo.dat", state) pof2 = analysis.ParseOutputFile( outputdir2 + "/models_scores_sigmas-ERa_Apo.dat", state) pof3 = analysis.ParseOutputFile( outputdir3 + "/models_scores_sigmas-ERa_Apo.dat", state) pof4 = analysis.ParseOutputFile( outputdir4 + "/models_scores_sigmas-ERa_Apo.dat", state) pof4 = analysis.ParseOutputFile( outputdir5 + "/models_scores_sigmas-ERa_Apo.dat", state) plots.plot_residue_protection_factors([pof, pof2, pof3, pof4], num_best_models=num_best_models, resrange=(240, 260))
# The output here is a list of clusters, as POF objects, containing the models from that cluster. # We use these for downstream analysis/plotting pofs = conv.cluster_at_threshold_and_return_pofs(sampling_precision) # This command plots the protection factor distribution curves. It will do it for up to 5? POF files. After that, you # run out of colors and it gets too busy anyways. # - first input is a list of POF objects. # - num_best_models is self-explanatory for POF. Set to "all" for all of them (should do that if you've already clustered) # - resrange is the range to plot # - true_vals are a list of numerical (log) protection values per residue. They will show up as horizontal green lines. Default is None. # - sort_sectors sorts residues in the sectors by increasing Pf value by model. # - outputdir is self explanatory. plots.plot_residue_protection_factors(pofs, num_best_models=num_best_models, resrange=resrange, true_vals=res_pfs, sort_sectors=True, outputdir="./") # Plot the incorporation curves. # # Use imagemagick montage to put them all together # Name exit() plots.plot_incorporation_curve_fits(pofs[0], num_best_models, write_plots=True, output_directory=outputdir)
import plots import cProfile ########################################## ### File/Directory Setup outputdir = "./testing_heurtemp" # output directory for the simulation results. outputdir2 = "./testing_heurtemp2" # output directory for the simulation results. sequence = "GMAEDMAADEVTAPPRKVLIISAGASHSVALLSGDIVCSWGRGEDGQLGHGDAEDRPSPTQLSALDGHQIVSVTCGADHTVAYSQSGMEVYSWGWGDFGRLGHGNSSDLFTPLPIKALHGIRIKQIACGDSHCLAVTMEGEVQSWGRNQNGQLGLGDTEDSLVPQKIQAFEGIRIKMVAAGAEHTAAVTEDGDLYGWGWGRYGNLGLGDRTDRLVPERVTSTGGEKMSMVACGWRHTISVSYSGALYTYGWSKYGQLGHGDLEDHLIPHKLEALSNSFISQISGGWRHTMALTSDGKLYGWGWNKFGQVGVGNNLDQCSPVQVRFPDDQKVVQVSCGWRHTLAVTERNNVFAWGRGTNGQLGIGESVDRNFPKIIEALSVDGASGQHIESSNIDPSSGKSWVSPAERYAVVPDETGLTDGSSKGNGGDISVPQTDVKRVRI" # FASTA sequence resrange = (100, 200 ) # Residue range is a tuple in pdb numbering (starts at 1). num_best_models = 1000 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ### Analysis. #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Initialize System sys = system.System(output_dir=None) mol = sys.add_macromolecule(sequence, "ERa") state = mol.get_apo_state() #mol.add_state("088074") pof = analysis.ParseOutputFile(outputdir + "/models_scores_sigmas-Apo.dat", state) pof2 = analysis.ParseOutputFile(outputdir2 + "/models_scores_sigmas-Apo.dat", state) plots.plot_residue_protection_factors([pof, pof2], num_best_models=num_best_models)
#sys.output.initialize_output_model_file(state, output_model.pf_grids) sampler = sampling.MCSampler(sys) sampler.run(10000, 2.0, write=True) pof = analysis.ParseOutputFile( output_dir_sample + "/models_scores_sigmas-Apo.dat", states[0]) pof1 = analysis.ParseOutputFile( output_dir_sample + "/models_scores_sigmas-Apo1.dat", states[1]) pof2 = analysis.ParseOutputFile( output_dir_sample + "/models_scores_sigmas-Apo2.dat", states[2]) pof3 = analysis.ParseOutputFile( output_dir_sample + "/models_scores_sigmas-Apo3.dat", states[3]) plots.plot_residue_protection_factors([pof, pof1, pof2, pof3], num_best_models=1000, sort_sectors=True, show=True) #plots.plot_po_model_scores(pof) #plots.plot_po_model_scores(pof2) #for i in range(2,10): # pof2.cluster_models_kmeans(nmodels=1000, nclust=i) ''' exit() for pep in dataset.get_peptides(): for tp in pep.get_timepoints(): #try: i = tp.get_replicates()[0] rep_score = -1*math.log(state.scoring_function.replicate_score(tp.get_model_deuteration()/pep.num_observable_amides*100, tp.get_replicates()[0].deut, tp.get_sigma()))