def make_bf_TIR(): workpath = "d:\Workspace\Ecoli\combinedP1" os.chdir(workpath) model_gtr = "rebuild_model.mdl" model_nest = "rna_full_length_structure.mdl" bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_nest, bf_template_file="templateNoGap") bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_gtr, bf_template_file="templateNoGap") bf_maker_gtr.set_tree_from_outside(SS.ecoli()) bf_maker_nest.set_tree_from_outside(SS.ecoli()) for window_width_offset in range(15): length_window = 30 + 5*window_width_offset aln_files = "d:\Workspace\Ecoli\ecoli_10_species" extracted_input_file = workpath + "\TIR" + str(length_window) + ".input" gene_conjunction(aln_files, extracted_input_file, length_window) bf_maker_gtr.write_batch_file(dot_input="TIR%d.input" % length_window, dot_aln="", hyphy_batch_file="TIR%dgtr.bf" % length_window, hyphy_result_file="TIR%dgtr.result" % length_window) bf_maker_nest.write_batch_file(dot_input="TIR%d.input" % length_window, dot_aln="", hyphy_batch_file="TIR%dnest.bf" % length_window, hyphy_result_file="TIR%dnest.result" % length_window)
def make_bf_p2(): workpath = "d:\Workspace\Ecoli\P2" os.chdir(workpath) model_gtr = "rebuild_model.mdl" model_nest = "rna_full_length_structure.mdl" bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_gtr, bf_template_file="templateNoGap") bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_nest, bf_template_file="templateNoGap") bf_maker_gtr.set_tree_from_outside(SS.ecoli()) bf_maker_nest.set_tree_from_outside(SS.ecoli()) aln_file_folder = "d:\Workspace\Ecoli\ecoli_10_species" aln_files = [file_single for file_single in os.listdir(aln_file_folder) if ".aln" == os.path.splitext(file_single)[-1]] for single_aln in aln_files: aln_full_path = os.path.join(aln_file_folder, single_aln) genes, lengths = DH.aln_info(aln_full_path) gene_full_length = lengths[0] jobid = single_aln.split(".")[0] input_file_name = "%s.input" % jobid DH.aln2input(aln_full_path, input_file_name) if gene_full_length < 52: print "%s too short ---" % single_aln continue bf_maker_gtr.set_partition(51, gene_full_length) bf_maker_nest.set_partition(51, gene_full_length) bf_maker_gtr.write_batch_file(dot_input=input_file_name, dot_aln="", hyphy_batch_file="%sp2gtr.bf" % jobid, hyphy_result_file="%sp2gtr.result" % jobid) bf_maker_nest.write_batch_file(dot_input=input_file_name, dot_aln="", hyphy_batch_file="%sp2nest.bf" % jobid, hyphy_result_file="%sp2nest.result" % jobid)
def sliding_window(): workpath = "d:\Workspace\Ecoli\slidingWindow" os.chdir(workpath) model_gtr = "rebuild_model.mdl" model_nest = "rna_full_length_structure.mdl" bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_nest, bf_template_file="templateNoGap") bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_gtr, bf_template_file="templateNoGap") bf_maker_gtr.set_tree_from_outside(SS.ecoli()) bf_maker_nest.set_tree_from_outside(SS.ecoli()) step_num = 15 step_width_step = 5 window_width_min = 30 site_start_point = 0 site_shift_step = 5 for window_start_offset in range(step_num): for window_width_offset in range(step_num): start_site = site_shift_step*window_start_offset + site_start_point length_window = step_width_step*window_width_offset + window_width_min aln_files = "d:\Workspace\Ecoli\ecoli_10_species" # extracted_input_file = workpath + "\TIR" + str(length_window) + ".input" job_description = "w%ds%d" % (length_window, start_site) extracted_input_file = "%s\\TIR%s.input" % (workpath, job_description) print extracted_input_file gene_conjunction(aln_files, extracted_input_file, length_window, start_site) bf_maker_gtr.write_batch_file(dot_input="TIR%s.input" % job_description, dot_aln="", hyphy_batch_file="TIR%sgtr.bf" % job_description, hyphy_result_file="TIR%sgtr.result" % job_description) bf_maker_nest.write_batch_file(dot_input="TIR%s.input" % job_description, dot_aln="", hyphy_batch_file="TIR%snest.bf" % job_description, hyphy_result_file="TIR%snest.result" % job_description)
def sliding_window(): """perform a sliding window analysis over sequence alignment """ workpath = "/Users/zerodel/WorkSpace/test/test_slidingWindow" # important here, has changed path. os.chdir(workpath) model_gtr = "rebuild_model.mdl" model_nest = "rna_full_length_structure.mdl" aln_files = "/Users/zerodel/WorkSpace/ecoli_aln" bf_maker_nest = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_nest, bf_template_file="templateNoGap") bf_maker_gtr = BfH.HYPHYBatchFile(species_name="ecoli", model_file=model_gtr, bf_template_file="templateNoGap") bf_maker_gtr.set_tree_from_outside(SS.ecoli()) bf_maker_nest.set_tree_from_outside(SS.ecoli()) step_num = 21 step_width_step = 15 window_width_min = 30 site_start_point = 0 site_shift_step = 15 window_axis = [0,2] start_axis = range(step_num) for window_width_offset in window_axis: for window_start_offset in start_axis: start_site = site_shift_step*window_start_offset + site_start_point length_window = step_width_step*window_width_offset + window_width_min # extracted_input_file = workpath + "\TIR" + str(length_window) + ".input" job_description = "w%ds%d" % (length_window, start_site) extracted_input_file = "TIR%s.input" % job_description extracted_input_no_gap = "TIR%sN.input" % job_description gene_conjunction(aln_files, os.path.join(workpath, extracted_input_file), length_window, start_site) gene_conjunction(aln_files, os.path.join(workpath, extracted_input_no_gap), length_window, start_site, False) bf_maker_gtr.write_batch_file(dot_input=extracted_input_file, dot_aln="", hyphy_batch_file="TIR%sgtr.bf" % job_description, hyphy_result_file="TIR%sgtr.result" % job_description) bf_maker_nest.write_batch_file(dot_input=extracted_input_file, dot_aln="", hyphy_batch_file="TIR%snest.bf" % job_description, hyphy_result_file="TIR%snest.result" % job_description) # no gap bf_maker_gtr.write_batch_file(dot_input=extracted_input_no_gap, dot_aln="", hyphy_batch_file="TIR%sgtrN.bf" % job_description, hyphy_result_file="TIR%sgtrN.result" % job_description) bf_maker_nest.write_batch_file(dot_input=extracted_input_no_gap, dot_aln="", hyphy_batch_file="TIR%snestN.bf" % job_description, hyphy_result_file="TIR%snestN.result" % job_description)
jobid = single_aln.split(".")[0] input_file_name = "%s.input" % jobid DH.aln2input(aln_full_path, input_file_name) if gene_full_length < 52: print "%s too short ---" % single_aln continue bf_maker_gtr.set_partition(51, gene_full_length) bf_maker_nest.set_partition(51, gene_full_length) bf_maker_gtr.write_batch_file(dot_input=input_file_name, dot_aln="", hyphy_batch_file="%sp2gtr.bf" % jobid, hyphy_result_file="%sp2gtr.result" % jobid) bf_maker_nest.write_batch_file(dot_input=input_file_name, dot_aln="", hyphy_batch_file="%sp2nest.bf" % jobid, hyphy_result_file="%sp2nest.result" % jobid) if __name__ == "__main__": <<<<<<< HEAD print "hello world" ======= print SS.ecoli() >>>>>>> f0acd743d1106c96b88083b2df2cb3526b388aec